22 #include <sys/types.h>
36 m_pAMyLocalBlock = NULL;
38 m_pARightBoloc = NULL;
40 m_nIterationCount = 0;
41 m_nEigenValueCheckInterval = 0;
44 m_bReorthogonalization =
false;
45 m_bCalcuEigenvector =
false;
48 m_pEigenValues = NULL;
49 m_pEigenVectors = NULL;
50 m_pConvergedEigenValues = NULL;
51 m_pConvergedEigenVectors = NULL;
52 m_pRangeCheckedEigenValues = NULL;
53 m_pRangeCheckedEigenVectors = NULL;
54 m_pNoneSpuriousValues = NULL;
55 m_pNoneSpuriousVectors = NULL;
56 m_pNonClustersValues = NULL;
57 m_pNonClustersVectors = NULL;
59 m_pRangecheckedIndex = NULL;
60 m_pNonSpuriousValueIndex = NULL;
61 m_pConvergedIndex = NULL;
62 m_pNonClustersValueIndex = NULL;
63 m_pCheckNonClusterValue = NULL;
69 FinalizeTemporaryArrayAndVector();
83 FinalizeTemporaryArrayAndVector();
101 FREE_MEM(m_pRangeCheckedEigenValues);
102 FREE_MEM(m_pRangeCheckedEigenVectors);
130 CKNLanczosMethod::LPEIGENVALUE_RESULT CKNLanczosMethod::DoLanczosMethod(
CKNMatrixOperation::CKNCSR *pAMatrix,
unsigned int nIterationCount,
unsigned int nEigenValueCheckInterval,
unsigned int nEigenValueCount,
double fEigenvalueMin,
double fEignevalueMax,
double fConvergenceTolerance,
bool bReorthogonalization,
bool bCalcuEigVector,
bool bWaveFunction,
double load_in_MIC,
CKNMatrixOperation::CKNCSR *pmylocalblock,
CKNMatrixOperation::CKNCSR *leftlocalblock,
CKNMatrixOperation::CKNCSR *rightlocalblock)
134 if (NULL == pAMatrix)
142 m_pAMatrix = pAMatrix;
143 m_pAMyLocalBlock = pmylocalblock;
144 m_pALeftBlock = leftlocalblock;
145 m_pARightBoloc = rightlocalblock;
147 m_nIterationCount = nIterationCount;
148 m_nEigenValueCheckInterval = nEigenValueCheckInterval;
149 m_nEigenValueCount = nEigenValueCount;
150 m_fEigenvalueMin = fEigenvalueMin;
151 m_fEignevalueMax = fEignevalueMax;
152 m_bReorthogonalization = bReorthogonalization;
153 m_bCalcuEigenvector = bCalcuEigVector;
154 m_fConvergenceTolerance = fConvergenceTolerance;
155 m_floadMIC = load_in_MIC;
157 lpRtn = LanczosIteration();
158 if( bCalcuEigVector )
159 DoResidualCheck(pAMatrix, lpRtn);
163 BuildWaveFunction(lpRtn);
176 double *pAlphaReal = NULL;
177 double *pBeta = NULL;
179 double *pWjm1 = NULL;
180 double *pWjp1 = NULL;
183 double *pCalculatedEigenVector = NULL;
190 InitLanczosIterationVariables(&pAlpha, &pAlphaReal, &pBeta, &pWj, &pWjm1, &pWjp1, &pW);
193 pAlphaReal[0] = pBeta[0] = pBeta[1] = 0;
195 #ifdef DISABLE_MPI_ROUTINE
198 #else //DISABLE_MPI_ROUTINE
212 #endif //DISABLE_MPI_ROUTINE
234 LanczosIterationLoop(lpResult, &V1, m_nIterationCount, pAlpha, pAlphaReal, pBeta, pWj, pWjm1, pWjp1);
238 FinalLanczosVector();
239 FinalizeLanczosInterationVariable(pAlpha, pAlphaReal, pBeta, pWj, pWjm1, pWjp1, pW);
248 #ifndef DISABLE_MPI_ROUTINE
257 #ifdef DISABLE_MPI_ROUTINE
259 #else //DISABLE_MPI_ROUTINE
263 if (m_bReorthogonalization)
266 CalculateEigenVector(lpResult, m_pV[i], i);
276 #ifndef DISABLE_MPI_ROUTINE
278 #else //DISABLE_MPI_ROUTINE
280 #endif //DISABLE_MPI_ROUTINE
285 FinalLanczosVector();
286 FinalizeLanczosInterationVariable(pAlpha, pAlphaReal, pBeta, pWj, pWjm1, pWjp1, pW);
308 int nEigenvalueSolvingCount = 0;
309 int nEigenvalueSolvingFinal = m_nIterationCount / m_nEigenValueCheckInterval;
311 unsigned int nSizePHI;
317 #ifdef DISABLE_MPI_ROUTINE
330 unsigned int nSizeFromPrevRank, nSizeFromNextRank;
331 unsigned int nSizetoPrevRank, nSizetoNextRank;
332 MPI_Status stat_sr[2];
333 MPI_Request req_sr[2];
335 nSizetoPrevRank = m_pAMatrix->nComponentsFirstUnitCell;
336 nSizetoNextRank = m_pAMatrix->nComponentsLastUnitCell;
343 MPI_Waitall(2, req_sr, stat_sr);
347 MPI_Waitall(2, req_sr, stat_sr);
372 unsigned int output_real_size = nSizePHI;
373 unsigned int output_imaginary_size = nSizePHI;
377 output_real_size = 1;
378 output_imaginary_size = 1;
381 #pragma offload_transfer target(mic:phi_tid) nocopy(output_real[0:output_real_size] : ALLOC)
382 #pragma offload_transfer target(mic:phi_tid) nocopy(output_imaginary[0:output_imaginary_size] : ALLOC)
392 #pragma offload_transfer target(mic:phi_tid) nocopy(vtemp_real[0:vtemp_size] : ALLOC)
393 #pragma offload_transfer target(mic:phi_tid) nocopy(vtemp_imaginary[0:vtemp_size] : ALLOC)
394 #pragma offload_transfer target(mic:phi_tid) in(vtemp_real[0:vtemp_size] : REUSE)
395 #pragma offload_transfer target(mic:phi_tid) in(vtemp_imaginary[0:vtemp_size] : REUSE)
421 #ifdef DISABLE_MPI_ROUTINE
422 nSize = m_nMatrixSize;
427 for (j = 1; j <= nIterationCount; j++)
432 sprintf(szMsg,
"[#%8d] Lanczos interation going on\n", j);
437 CalculateEigenVector(lpResult, Vj, j);
439 #ifdef DISABLE_MPI_ROUTINE
443 #else //DISABLE_MPI_ROUTINE
479 printf(
"-Using MVMulEx_Optimal with %.1f(%%) offload\n", m_floadMIC);
480 #endif //DISABLE_MPI_ROUTINE
491 #ifdef DISABLE_MPI_ROUTINE
494 pBeta[j + 1] = W.
GetNorm(
true);
499 if (m_bReorthogonalization &&
false == bMakeEigvVector)
503 if (1 == j && fANorm < fabs(pAlphaReal[1]) + pBeta[2])
504 fANorm = fabs(pAlphaReal[1]) + pBeta[2];
505 else if (fANorm < fabs(pAlphaReal[j]) + pBeta[j + 1] + pBeta[j])
506 fANorm = fabs(pAlphaReal[j]) + pBeta[j + 1] + pBeta[j];
510 if (CheckAndDoSelectiveReorthogonalization(j-1, pAlphaReal+1, pBeta+2, pWj, pWjm1, pWjp1, fANorm))
520 if (0 == j % m_nEigenValueCheckInterval &&
false == bMakeEigvVector)
525 if (InitializeTemporaryArrayAndVector(j))
528 bFound = DoEigenValueSolving(j, pAlphaReal, pBeta, fANorm, lpResult, ++nEigenvalueSolvingCount == nEigenvalueSolvingFinal);
537 FinalizeTemporaryArrayAndVector();
551 #pragma offload_transfer target(mic:phi_tid) nocopy(output_real : FREE)
552 #pragma offload_transfer target(mic:phi_tid) nocopy(output_imaginary : FREE)
554 #pragma offload_transfer target(mic:phi_tid) nocopy(vtemp_real : FREE)
555 #pragma offload_transfer target(mic:phi_tid) nocopy(vtemp_imaginary : FREE)
580 if (
false == m_bReorthogonalization)
586 for (i = 0; i < m_nIterationCount + 2; i++)
587 #ifdef DISABLE_MPI_ROUTINE
588 m_pV[i].SetSize(m_nMatrixSize);
589 #else //DISABLE_MPI_ROUTINE
591 #endif //DISABLE_MPI_ROUTINE
598 if (
false == m_bReorthogonalization)
601 for (i = 0; i < m_nIterationCount + 2; i++)
620 *pAlpha =
new CKNComplex[m_nIterationCount + 2];
621 *pAlphaReal = (
double*)malloc(
sizeof(
double)*(m_nIterationCount + 5));
622 *pBeta = (
double*)malloc(
sizeof(
double)*(m_nIterationCount + 5));
626 *pWj = (
double*)malloc(
sizeof(
double)*(m_nIterationCount + 3));
627 *pWjm1 = (
double*)malloc(
sizeof(
double)*(m_nIterationCount + 3));
628 *pWjp1 = (
double*)malloc(
sizeof(
double)*(m_nIterationCount + 3));
630 memset(*pWj, 0,
sizeof(
double)*(m_nIterationCount + 3));
631 memset(*pWjm1, 0,
sizeof(
double)*(m_nIterationCount + 3));
632 memset(*pWjp1, 0,
sizeof(
double)*(m_nIterationCount + 3));
675 unsigned int i, k, nRepeatCount;
683 #ifdef DISABLE_MPI_ROUTINE
684 nRepeatCount = m_nMatrixSize;
685 #else //DISABLE_MPI_ROUTINE
689 for (k = 0; k < nRepeatCount; k++)
710 bool bDoSelectiveReorthogonalization =
false;
713 return bDoSelectiveReorthogonalization;
725 void thomas_alg(
double **T,
double *initguess,
double *app_evc,
int iter)
729 double temp, tbeta = T[0][0];
731 double *P = (
double *)malloc(
sizeof(
double)*iter);
734 for (i = 0; i<iter - 1; i++)
736 if (fabs(T[1][i])>fabs(tbeta))
738 initguess[i + 1] -= initguess[i] * tbeta / T[1][i];
739 T[1][i + 1] -= T[0][i] * tbeta / T[1][i];
745 temp = initguess[i + 1];
746 initguess[i + 1] = initguess[i];
750 T[1][i + 1] = T[0][i];
753 initguess[i + 1] -= initguess[i] * T[1][i] / tbeta;
755 T[1][i + 1] -= T[0][i] * T[1][i] / tbeta;
760 T[0][i + 1] = -P[i] * T[1][i] / tbeta;
771 for (i = iter - 1; i>-1; i--)
774 app_evc[i] = initguess[i] / T[1][i];
775 else if (i == iter - 2)
777 app_evc[i] = initguess[i] - app_evc[i + 1] * T[0][i];
778 app_evc[i] = app_evc[i] / T[1][i];
782 app_evc[i] = initguess[i] - app_evc[i + 1] * T[0][i] - app_evc[i + 2] * P[i];
783 app_evc[i] = app_evc[i] / T[1][i];
790 void inverse_iter(
double *alpha,
double *beta,
double *app_evc,
int iter,
double app_eva)
792 int i, j, k, iteration = 20;
793 double temp = 0, err, etol = 1e-13;
795 double *initguess = (
double *)malloc(
sizeof(
double)*iter);
796 double **T = (
double **)malloc(
sizeof(
double)* 2);
798 for (i = 0; i<2; i++)
799 T[i] = (
double *)malloc(
sizeof(
double)*iter);
801 for (i = 0; i<iter; i++)
802 initguess[i] = 1 / sqrt((
double)iter);
804 for (k = 0; k<iteration; k++)
806 for (i = 0; i<2; i++)
810 for (j = 0; j<iter; j++)
811 T[i][j] = alpha[j] - app_eva;
816 for (j = 0; j<iter; j++)
824 for (i = 0; i<iter; i++)
825 temp += app_evc[i] * app_evc[i];
827 for (i = 0; i<iter; i++)
828 app_evc[i] = app_evc[i] / sqrt(temp);
831 for (i = 0; i<iter; i++)
834 temp += app_evc[i] * (alpha[i] * app_evc[i] + beta[i] * app_evc[i + 1]);
835 else if (i == iter - 1)
836 temp += app_evc[i] * (beta[i - 1] * app_evc[i - 1] + alpha[i] * app_evc[i]);
838 temp += app_evc[i] * (beta[i - 1] * app_evc[i - 1] + alpha[i] * app_evc[i] + beta[i] * app_evc[i + 1]);
841 err = app_eva - temp;
842 if (fabs(err) < etol)
847 for (i = 0; i<iter; i++)
848 initguess[i] = app_evc[i];
852 for (i = 0; i<2; i++)
858 #define SET_RESULT_TO_PARAMETER(pResultValue, pResultVector, nResultCount) \
859 pCalcuResult_Value = pResultValue; \
860 pCalcuResult_Vector = pResultVector; \
861 nCalculatedEigenValueCount = nResultCount
874 double *pCalcuResult_Value = NULL;
875 double *pCalcuResult_Vector = NULL;
876 int nConvergedEigenvalueCount = 0;
877 int nRangecheckedEigenvalueCount = 0;
878 int nNonSpuriousRitzValueCount = 0;
879 int nNonClustersValueCount = 0;
880 unsigned int nCalculatedEigenValueCount = 0, nCalculatedEigenValueCountBeforeConvergenceCheck;
882 bool *pValidEigenValue = NULL;
885 if (NULL == lpResult)
891 nCalculatedEigenValueCount = EigenValueSolver(nIterationCount, pAlpha, pBeta, m_pEigenValues, m_pEigenVectors);
892 pCalcuResult_Value = m_pEigenValues;
893 pCalcuResult_Vector = m_pEigenVectors;
894 nCalculatedEigenValueCountBeforeConvergenceCheck = nCalculatedEigenValueCount;
897 m_pEigenVectors = (
double*)malloc(
sizeof(
double)*nCalculatedEigenValueCount*nIterationCount);
898 for (i = 0; i < nCalculatedEigenValueCount; ++i)
900 double *pEigenVector = (
double*)malloc(
sizeof(
double)*nIterationCount);
902 if (NULL == pEigenVector)
905 inverse_iter(pAlpha + 1, pBeta + 2, pEigenVector, nIterationCount, m_pEigenValues[i]);
906 memcpy(m_pEigenVectors + nIterationCount * i, pEigenVector,
sizeof(
double)*nIterationCount);
911 pValidEigenValue = (
bool*)malloc(
sizeof(
bool)*nCalculatedEigenValueCount);
912 for (i = 0; i < nCalculatedEigenValueCount; ++i)
913 pValidEigenValue[i] =
true;
917 nCalculatedEigenValueCount = ConvergenceCheckingEx(nCalculatedEigenValueCount, m_pEigenValues, m_pEigenVectors,
918 pValidEigenValue, fANorm, pBeta, nIterationCount);
922 nCalculatedEigenValueCount = DistinguishClusterOfEigenvalueEx(nCalculatedEigenValueCount, m_pEigenValues, m_pEigenVectors,
923 pValidEigenValue, nIterationCount);
926 if (nCalculatedEigenValueCount > 0)
928 IntegrateEigenvaluesEx(nIterationCount, lpResult, nCalculatedEigenValueCount, nCalculatedEigenValueCountBeforeConvergenceCheck, m_pEigenValues, m_pEigenVectors, pValidEigenValue);
958 lpResult->
pEigenValues = (
double*)malloc(
sizeof(
double)*nCalculatedEigenValueCount);
959 lpResult->
pEigenVectors = (
double**)malloc(
sizeof(
double*)*nCalculatedEigenValueCount);
961 for (i = 0; i < nCalculatedEigenValueCount; i++)
963 lpResult->
pEigenVectors[i] = (
double*)malloc(
sizeof(
double)*nIterationCount);
972 for (i = 0; i < nCalculatedEigenValueCountBeforeConvergenceCheck; i++)
974 if (
false == pbValidEigenValue[i])
986 if (0 == nCalculatedEigenValueCount - nSameCount)
993 int nAddSize = nCalculatedEigenValueCount - nSameCount;
1004 for (i = 0; i < nCalculatedEigenValueCountBeforeConvergenceCheck; i++)
1006 if (
false == pbValidEigenValue[i])
1034 memcpy(lpResult->
pEigenVectors[j], pCalcuResult_Vector + (i * nIterationCount),
sizeof(
double)* nIterationCount);
1056 for (i = 0; i < nCalculatedEigenValueCount; i++)
1085 memcpy(lpResult->
pEigenVectors + (lpResult->
nEigenValueCount*m_nIterationCount), pCalcuResult_Vector + (i * nIterationCount),
sizeof(
double)* nIterationCount);
1103 if (0 == nEigenValueCount)
1106 double fTotal = m_fConvergenceTolerance;
1108 int i, nConvergedCount = nEigenValueCount;
1110 for (i = 0; i < nEigenValueCount; i++)
1112 if (
false == pbValidEigenValue[i])
1115 fResidual = fabs(pBeta[nIterationCount + 1] * pEiegnVectors[i*nIterationCount + nIterationCount - 1]);
1117 if (fResidual >= fTotal)
1119 pbValidEigenValue[i] =
false;
1124 return nConvergedCount;
1138 int CKNLanczosMethod::ConvergenceChecking(
int nEigenValueCount,
double *pEigenValues,
double *pEiegnVectors,
double *pConvergedEigenValues,
double *pConvergedEigenVectors,
double fANorm,
double *pBeta,
int nIterationCount)
1140 if (0 == nEigenValueCount)
1143 if (NULL == pConvergedEigenValues || NULL == pConvergedEigenVectors || NULL == m_pConvergedIndex)
1146 double fTotal = m_fConvergenceTolerance;
1148 int i, nConvergedCount = 0;
1150 for (i = 0; i < nEigenValueCount; i++)
1152 fResidual = fabs(pBeta[nIterationCount + 1] * pEiegnVectors[i*nIterationCount + nIterationCount - 1]);
1154 if (fResidual < fTotal)
1155 m_pConvergedIndex[nConvergedCount++] = i;
1158 ExtractDoubleValues(pConvergedEigenValues, pEigenValues, nEigenValueCount, m_pConvergedIndex, nConvergedCount,
false);
1159 ExtractDoubleVector(nIterationCount, pConvergedEigenVectors, pEiegnVectors, nEigenValueCount, m_pConvergedIndex, nConvergedCount,
false);
1161 return nConvergedCount;
1173 int CKNLanczosMethod::RangeChecking(
int nEigenValueCount,
double *pEigenValues,
double *pEiegnVectors,
double *pRangeCheckingEigenValues,
double *pRangeCheckingVectors,
int nIterationCount)
1175 int i, nRangecheckedCount = 0;
1177 if (NULL == pRangeCheckingEigenValues || NULL == pRangeCheckingVectors || NULL == m_pRangecheckedIndex)
1180 for (i = 0; i < nEigenValueCount; i++)
1182 if (pEigenValues[i] >= m_fEigenvalueMin && pEigenValues[i] <= m_fEignevalueMax)
1183 m_pRangecheckedIndex[nRangecheckedCount++] = i;
1186 ExtractDoubleValues(pRangeCheckingEigenValues, pEigenValues, nEigenValueCount, m_pRangecheckedIndex, nRangecheckedCount,
false);
1187 ExtractDoubleVector(nIterationCount, pRangeCheckingVectors, pEiegnVectors, nEigenValueCount, m_pRangecheckedIndex, nRangecheckedCount,
false);
1189 return nRangecheckedCount;
1204 if (0 == nEigenValueCount)
1207 if (NULL == pNonSpuriousValues || NULL == pNonSpuriousVectors || NULL == m_pNonSpuriousValueIndex)
1211 double fTotal = eps;
1212 int i, nNonSpuriousValue = 0;
1214 for (i = 0; i < nEigenValueCount; i++)
1216 if (fabs(pEigenVectors[i*nIterationCount]) > fTotal)
1217 m_pNonSpuriousValueIndex[nNonSpuriousValue++] = i;
1220 ExtractDoubleValues(pNonSpuriousValues, pEigenValues, nEigenValueCount, m_pNonSpuriousValueIndex, nNonSpuriousValue,
false);
1221 ExtractDoubleVector(nIterationCount, pNonSpuriousVectors, pEigenVectors, nEigenValueCount, m_pNonSpuriousValueIndex, nNonSpuriousValue,
false);
1223 return nNonSpuriousValue;
1236 int i, j, nNonClustersValue = nEigenValueCount;
1238 if (0 == nEigenValueCount)
1241 for (i = 0; i < nEigenValueCount - 1; i++)
1243 if (!pbValidEigenValues[i])
1246 for (j = i + 1; j < nEigenValueCount; j++)
1248 if (!pbValidEigenValues[j])
1251 if (fabs(pEigenValues[i] - pEigenValues[j]) < eps)
1253 pbValidEigenValues[j] =
false;
1254 nNonClustersValue--;
1259 return nNonClustersValue;
1274 int i, j, nNonClustersValue = 0;
1276 if (0 == nEigenValueCount)
1279 if (NULL == pNonClustersValues || NULL == m_pCheckNonClusterValue || NULL == m_pNonClustersValueIndex)
1282 for (i = 0; i < nEigenValueCount; i++)
1283 m_pCheckNonClusterValue[i] =
true;
1285 for (i = 0; i < nEigenValueCount - 1; i++)
1287 if (!m_pCheckNonClusterValue[i])
1290 for (j = i + 1; j < nEigenValueCount; j++)
1292 if (fabs(pEigenValues[i] - pEigenValues[j]) < eps)
1294 m_pCheckNonClusterValue[j] =
false;
1299 for (i = 0; i < nEigenValueCount; i++)
1301 if (m_pCheckNonClusterValue[i])
1302 m_pNonClustersValueIndex[nNonClustersValue++] = i;
1305 ExtractDoubleValues(pNonClustersValues, pEigenValues, nEigenValueCount, m_pNonClustersValueIndex, nNonClustersValue,
false);
1306 if (NULL != pNonClustersVectors)
1307 ExtractDoubleVector(nIterationCount, pNonClustersVectors, pEigenVectors, nEigenValueCount, m_pNonClustersValueIndex, nNonClustersValue,
false);
1309 return nNonClustersValue;
1323 MKL_INT n, lda, ldz, il, iu, lwork;
1324 MKL_INT nFoundEigenValueCount;
1327 double abstol = 1.0e-8;
1331 MKL_INT *iwork = (MKL_INT*)malloc(
sizeof(MKL_INT)* 10 * nIterationCount);
1332 MKL_INT *ifail = (MKL_INT*)malloc(
sizeof(MKL_INT)* nIterationCount);
1335 int *iblock = (
int *)malloc(
sizeof(
int)*nIterationCount);
1336 int *isplit = (
int *)malloc(
sizeof(
int)*nIterationCount);
1339 work = (
double*)malloc(
sizeof(
double)*nIterationCount * 10);
1341 n = ldz = lda = nIterationCount;
1343 iu = nIterationCount;
1344 vl = m_fEigenvalueMin;
1345 vu = m_fEignevalueMax;
1349 dstebz(
"V",
"E", &n, &vl, &vu, &il, &iu, &abstol, pAlpha + 1, pBeta + 2, &nFoundEigenValueCount, &nsplit, pEigenValues, iblock, isplit, work, iwork, &info);
1359 return nFoundEigenValueCount;
1370 double *pTMatrix = (
double*)malloc(
sizeof(
double)*nOrder*nOrder);
1375 if (NULL == pTMatrix)
1381 memset(pTMatrix, 0,
sizeof(
double)*nOrder*nOrder);
1383 for (k = 1; k <= nOrder; k++)
1385 pTMatrix[((k - 1) * nOrder) + (k - 1)] = pAlpha[k];
1387 pTMatrix[(k - 1)*nOrder + k] = pBeta[k + 1];
1389 pTMatrix[(k - 1)*nOrder + (k - 2)] = pBeta[k];
1407 m_nIterationCount = 0;
1408 m_nEigenValueCheckInterval = 0;
1409 m_nEigenValueCount = 0;
1410 m_bReorthogonalization =
false;
1428 int nTargetIndex = 0;
1429 for (i = 0; i < nSrcCount; i++)
1431 if (i != pFilter[nSrcIndex])
1432 pTarget[nTargetIndex++] = pSource[i];
1439 for (i = 0; i < nFilterCount; i++)
1440 pTarget[i] = pSource[pFilter[i]];
1460 int nTargetIndex = 0;
1461 for (i = 0; i < nSrcCount; i++)
1463 if (i != pFilter[nSrcIndex])
1464 memcpy(pTarget + (nTargetIndex*nVectorsize), pSource + (i * nVectorsize),
sizeof(
double)* nVectorsize);
1471 for (i = 0; i < nFilterCount; i++)
1472 memcpy(pTarget + (i*nVectorsize), pSource + (pFilter[i] * nVectorsize),
sizeof(
double)* nVectorsize);
1483 if (NULL == lpResult)
1535 unsigned int i, j, nIndex = 0;
1550 double absoluteValue = complexNumber.
GetAbsolute();
1551 fTempResult += (absoluteValue*absoluteValue);
1572 std::vector<bool> vectorResidualCheck;
1573 bool bFoundNoAnswer =
false;
1574 std::vector<int> vectorResidualAnswer;
1575 int nResidualAnswerCount = 0;
1579 #ifdef DISABLE_MPI_ROUTINE
1582 #else //DISABLE_MPI_ROUTINE
1585 #endif //DISABLE_MPI_ROUTINE
1596 #ifdef DISABLE_MPI_ROUTINE
1597 fNorm = vectorResult1.
GetNorm();
1598 #else //DISABLE_MPI_ROUTINE
1599 fNorm = vectorResult1.
GetNorm(
true);
1600 #endif DISABLE_MPI_ROUTINE
1603 vectorResidualCheck.push_back(
true);
1606 vectorResidualCheck.push_back(
false);
1607 bFoundNoAnswer =
true;
1611 if (!bFoundNoAnswer)
1620 if( vectorResidualCheck[i] )
1622 vectorResidualAnswer.push_back(i);
1623 nResidualAnswerCount++;
1627 for( i = 0 ; i < nResidualAnswerCount; ++ i )
1629 if( i == vectorResidualAnswer[i] )
1656 if (NULL == lpResult || NULL == pKValue)
1660 char szFileName[1024], szBuffer[1024];
1661 std::string writeString;
1662 unsigned int i, j, k;
1663 char szFileOpt[3] =
"wt";
1665 if( 0 != nRepeatCount )
1666 strcpy(szFileOpt,
"at");
1671 mkdir(
"result", 0777);
1677 if (NULL != (out = fopen(
"result\\eigenvalues.txt", szFileOpt)))
1679 if (NULL != (out = fopen(
"result/eigenvalues.txt", szFileOpt)))
1682 int nEigenValueCount;
1683 if( NULL != pKValue )
1684 sprintf(szBuffer,
"---[ev #%d %8.6f %8.6f %8.6f]---\n\n", nRepeatCount, pKValue[0], pKValue[1], pKValue[2]);
1686 sprintf(szBuffer,
"---[ev]---\n\n");
1688 fputs(szBuffer, out);
1697 fputs(szBuffer, out);
1722 sprintf(szFileName,
"result\\eigenvector_%02d_%02d.txt", nRepeatCount, j);
1724 sprintf(szFileName,
"result/eigenvector_%02d_%02d.txt", nRepeatCount, j);
1730 if (NULL != (out = fopen(szFileName,
"at")))
1735 sprintf(szBuffer,
"%16.16f %16.16f\n",
1739 writeString += szBuffer;
1743 fputs(writeString.c_str(), out);
1744 writeString.clear();
1748 if (!writeString.empty())
1750 fputs(writeString.c_str(), out);
1751 writeString.clear();
1757 #ifndef DISABLE_MPI_ROUTINE
1759 #endif //DISABLE_MPI_ROUTINE
1772 sprintf(szFileName,
"result\\wavefunction_%02d_%02d.txt", nRepeatCount, j);
1774 sprintf(szFileName,
"result/wavefunction_%02d_%02d.txt", nRepeatCount, j);
1780 if (NULL != (out = fopen(szFileName,
"at")))
1784 sprintf(szBuffer,
"%16.16f\n",
1787 writeString += szBuffer;
1791 fputs(writeString.c_str(), out);
1792 writeString.clear();
1796 if (!writeString.empty())
1798 fputs(writeString.c_str(), out);
1799 writeString.clear();
1804 #ifndef DISABLE_MPI_ROUTINE
1806 #endif //DISABLE_MPI_ROUTINE
1837 #ifdef DOING_MEASUREMENT
1839 sprintf(szMsg,
"%d nodes used\nTotal time [ %f\tsec ]\nComputing [ %lf\tsec ]\nEvalue takes [ %lf\tsec ]\nMPI takes [ %lf\tsec ]\nMVMul takes [ %lf\tsec ]\nVVDot takes [ %lf\tsec ]\nMem Op takes [ %lf\tsec ]\nResult written [ %lf\tsec ]\n",
1852 #endif //DOING_MEASUREMENT
1854 if (NULL != lpResult)
1856 int nEigenValueCount;
1863 sprintf(szMsg,
"[ev %2d] %18.16f - Degenerated eigenvalue\n", i, lpResult->
pEigenValues[i]);
1879 unsigned int i, j, nIndex = 0;
1890 double absoluteValue = complexNumber.
GetAbsolute();
1891 fTempResult += (absoluteValue*absoluteValue);
1943 unsigned int i, nNewIndex;
1946 unsigned int nAdjust = 0;
2012 unsigned int i, j, *pTargetDeflationGroup = NULL, k;
2013 unsigned int *pTargetDeflationEV = NULL, *pEVFindIteration = NULL;
2014 int *pEigenValueCount = NULL;
2015 double *pEVTotal = NULL;
2016 int nEVTotalCount = 0;
2019 bool bKeepWait =
true;
2020 unsigned int nTargetGroup;
2031 int nDeflationStartIndex, nDeflationIndex, nTargetIndex;
2033 for( i = 0; i < nFindingDegeneratedEVCount ; ++ i)
2034 nEVTotalCount += pEigenValueCount[i];
2036 pEVTotal = (
double*)malloc(
sizeof(
double)*nEVTotalCount);
2037 pTargetDeflationGroup = (
unsigned int*)malloc(
sizeof(
unsigned int)*nEVTotalCount);
2038 pTargetDeflationEV = (
unsigned int*)malloc(
sizeof(
unsigned int)*nEVTotalCount);
2039 pEVFindIteration = (
unsigned int*)malloc(
sizeof(
unsigned int)*nEVTotalCount);
2041 nDeflationStartIndex = 0;
2042 nDeflationIndex = 0;
2044 for( i = 0; i < nFindingDegeneratedEVCount ; ++ i)
2047 for( j = nDeflationStartIndex ; j < nDeflationStartIndex + pEigenValueCount[i] ; ++j )
2049 pTargetDeflationGroup[j] = nDeflationIndex;
2050 pTargetDeflationEV[j] = nTargetIndex++;
2053 nDeflationStartIndex += pEigenValueCount[i];
2075 for( i = pEigenValueCount[0] ; i < nEVTotalCount ; ++i )
2077 bool bNewEigenValue =
true;
2080 for( j = 0 ; j < nStartIndex ; ++j )
2084 bool bDoOrthgonal =
false;
2086 std::vector<unsigned int> vectorOrthgonalTarget;
2088 vectorOrthgonalTarget.push_back(j);
2089 bNewEigenValue =
false;
2092 Command[1] = (double)pTargetDeflationGroup[i];
2093 Command[2] = (double)pTargetDeflationEV[i];
2105 bDoOrthgonal = !bDoOrthgonal;
2114 Command[1] = (double)k;
2120 bDoOrthgonal =
false;
2124 vectorOrthgonalTarget.push_back(k);
2131 Command[1] = bDoOrthgonal ? 1 : 0;
2136 int nOrthogonalTarget;
2137 int *pOrthgonalTarget = NULL;
2140 nOrthogonalTarget = vectorOrthgonalTarget.size();
2143 pOrthgonalTarget = (
int*)malloc(
sizeof(
int)*nOrthogonalTarget);
2144 for( k = 0 ; k < nOrthogonalTarget ; ++ k )
2145 pOrthgonalTarget[k] = vectorOrthgonalTarget[k];
2148 for( k = 0 ; k < nOrthogonalTarget ; ++ k )
2154 if( complexResult == 0.0 )
2164 vectorTemp2 = vectorFromDeflation;
2173 double fNorm = vectorTemp2.
GetNorm(
true);
2178 Command[1] = (double)pTargetDeflationGroup[i];
2184 AppendEigenValue(lpResult, pEVTotal[i]);
2185 AppendEigenVector(lpResult, &vectorFromDeflation);
2209 Command[1] = (double)pTargetDeflationGroup[i];
2210 Command[2] = (double)pTargetDeflationEV[i];
2218 AppendEigenValue(lpResult, pEVTotal[i], pEVFindIteration[i],
true);
2219 AppendEigenVector(lpResult, &vectorFromDeflation,
true);
2238 switch((
int)Command[0])
2248 nTargetGroup = (
unsigned int)Command[1];
2254 AppendEigenVector(lpResult, &vectorFromDeflation,
true);
2262 nTargetGroup = (
unsigned int)Command[1];
2277 if(
DO_ORTHGONAL == (
int)Command[0] && 1 == (
int)Command[1])
2280 int nOrthogonalTarget;
2281 int *pOrthgonalTarget = NULL;
2286 pOrthgonalTarget = (
int*)malloc(
sizeof(
int)*nOrthogonalTarget);
2290 for( k = 0 ; k < nOrthogonalTarget ; ++ k )
2296 if( complexResult == 0.0 )
2306 vectorTemp2 = vectorFromDeflation;
2310 double fNorm = vectorTemp2.
GetNorm(
true);
2319 AppendEigenVector(lpResult, &vectorFromDeflation);
2332 int nSentEVIndex = -1;
2344 switch((
int)Command[0])
2352 nTargetGroup = (
unsigned int)Command[1];
2356 nSentEVIndex = (int)Command[2];
2361 nTargetGroup = (
unsigned int)Command[1];
2377 switch((
int)Command[0])
2386 nSentEVIndex = (int)Command[2];
2417 if(
true == bCalcuWaveFunction )
2418 if(
false == bCalcuEigenValue )
2421 if( nDeflationGroup > 1 &&
false == bCalcuEigenValue )
2518 lpResult->
pWaveFunctions[pOrder[i*2]] = pVectorWF[pOrder[i*2+1]];
2526 pVectorWF[i].Finalize();
static void BroadcastBool(bool *boolValue, int nRootRank=0)
Broadcst boolean value.
static void GatherEVFromDeflationGroup(int nSourceCount, double *pReceiveBuffer, int *pSourceCount, double *pSendBuffer, int nSendCount)
void SetSize(unsigned int nSize)
Set Vector elements size.
CKNMatrixOperation::CKNVector * pWaveFunctions
void FinalLanczosVector()
Deallocating lanczos vectors.
double GetImaginaryNumber() const
Get imaginary part.
static void BroadcastInt(int *pValue, unsigned int nSize, int nRootRank=0, MPI_Comm comm=MPI_COMM_NULL)
Broadcst boolean value.
void ScalarDivision(CKNComplex Scalar)
Scalar division operation.
static void ShowMsg(char *pszBuffer)
Show message.
void ScalarMultiple(CKNComplex Scalar)
Scalar multiple operation.
void Normalize(bool bMPI=false)
Normalize vector with norm.
void DoSelectiveReorthogonalization(unsigned int nIterationCount)
Do selective reorthogonalization.
void ScalarMultiThanMinusVector(double fScalar, CKNVector *vector)
Do minus operation after scalar multiple to operand between vectors.
static MPI_Comm GetMPIComm()
static void MergeDegeneratedEigenvalues(CKNLanczosMethod::LPEIGENVALUE_RESULT lpResult, unsigned int nFindingDegeneratedEVCount, CKNMatrixOperation::CKNCSR *pA, CKNMatrixOperation::CKNCSR *pLocalBlock, CKNMatrixOperation::CKNCSR *pLeftBlock, CKNMatrixOperation::CKNCSR *pRightBlock)
Merging eigenvalue into mater group.
static int * GetEigenvalueCountFromDeflationGroup(int nDeflationGroupCount, int nLocalEVCount)
Checking is root rank of Deflation computation.
static void BroadcastLanczosResult(CKNLanczosMethod::LPEIGENVALUE_RESULT lpResult, int nIterationCount)
Broadcast Lanczos result.
void DoResidualCheck(CKNMatrixOperation::CKNCSR *pAMatrix, LPEIGENVALUE_RESULT lpResult)
Residual checking.
Show message and debugging variable.
static void BroadcastDouble(double *pValue, unsigned int nSize, int nRootRank=0, MPI_Comm comm=MPI_COMM_NULL)
Broadcst boolean value.
double GetRealNumber() const
Get real part.
CKNMatrixOperation::CKNVector * pEigenVectorsForAMatrix
void InitLanczosIterationVariables(CKNComplex **pAlpha, double **pAlphaReal, double **pBeta, double **pWj, double **pWjm1, double **pWjp1, CKNMatrixOperation::CKNVector **pW)
Init omega, alpha, beta array.
unsigned int GetColumnCount()
Getting row size of matrix.
static MPI_Comm GetLanczosComputComm()
Data and operation representation of CSR(Compressed Sparse Row)
static int GetCurrentLoadBalanceCount()
Get Current node's rank load balancing number.
struct CKNLanczosMethod::RESULT_SORT_DATA * LPRESULT_SORT_DATA
void InitLanczosVector()
Init lanczos vectors.
static void MeasurementEnd(MEASUREMENT_INDEX index)
Measurement end for part.
unsigned int nEigenVectorSize
static int ResultCompare(const void *pA, const void *pB)
Comparing computing result function for quick sorting.
static double GetTakeTime(MEASUREMENT_INDEX index)
Get taken time for part.
static void MVMulEx_Optimal(CKNCSR *pAMatrix, CKNVector *pVector, CKNVector *pResult, unsigned int, unsigned int, CKNVector *, int)
Matrix and vector multiple operation for 1 layer exchanging communication.
static unsigned int GetLanczosGroupIndex()
static void GatherEVIterationFromDeflationGroup(int nSourceCount, int *pReceiveBuffer, int *pSourceCount, int *pSendBuffer, int nSendCount)
Gather eigenvalue from deflation group.
bool InitializeTemporaryArrayAndVector(int nIterationCount)
Initialize temporary eigenvalue arrays and vectors.
static int GetTotalNodeCount()
void BuildWaveFunction(LPEIGENVALUE_RESULT lpResult)
Building wavefunction.
static void ReceiveVectorSync(int nSourceRank, CKNMatrixOperation::CKNVector *pVector, int nSize, MPI_Request *req, MPI_Comm commWorld=MPI_COMM_NULL)
Receiving Vector with sync.
#define SHOW_SIMPLE_MSG(message)
void FinalizeLanczosInterationVariable(CKNComplex *pAlpha, double *pAlphaReal, double *pBeta, double *pWj, double *pWjm1, double *pWjp1, CKNMatrixOperation::CKNVector *pW)
Deallocating omega, alpha, beta.
#define GENERAL_TOLERANCE
General tolerance definition.
CKNLanczosMethod()
Constructor.
void MinusVector(CKNVector *vector)
Do minus operation between vectors.
static void RecalcuWaveFunction(CKNLanczosMethod::LPEIGENVALUE_RESULT lpResult)
Recalculating wavefunction after merging degenerated eigenvalues.
bool CheckAndDoSelectiveReorthogonalization(int nIterationCount, double *pAlpha, double *pBeta, double *pWj, double *pWjm1, double *pWjp1, double fANorm)
Check current state need selective reorthogonalization and do it.
#define TOLERANE_M_10_9
10^-9
static void AppendEigenVector(LPEIGENVALUE_RESULT lpResult, CKNMatrixOperation::CKNVector *pEigenVector, bool bInsertFirst=false)
Appending eigenvector into master group if degenerated eigenvalue is finded.
void IntegrateEigenvalues(int nIterationCount, LPEIGENVALUE_RESULT lpResult, unsigned int nCalculatedEigenValueCount, double *pCalcuResult_Value, double *pCalcuResult_Vector)
Integrating computing solution during Lanczos method operation.
static bool IsSameA(double operand1, double operand2, double tol)
Compare two double variable.
Structure for eigenvalue result sorting.
double * BuildTMatrix(unsigned int nOrder, double *pAlpha, double *pBeta)
Building T matrix for solving eigenvalue.
static void StopIteration()
Stop lanczos iteration on going state.
#define DEGENERATED_INDEX
static void SaveLanczosResult(CKNLanczosMethod::LPEIGENVALUE_RESULT lpResult, bool bCalcuEigenvalue, bool bWaveFunction, double *pKValue, int nRepeatCount)
Saving Lanczos computation result into file.
#define EXIT_MPI_WAIT
Command for Deflation Lanczos.
Common definition for Solver.
static void SendVectorSync(int nTargetRank, CKNMatrixOperation::CKNVector *pVector, int nSize, MPI_Request *req, MPI_Comm commWorld=MPI_COMM_NULL)
Getting Deflation computing group MPI_Comm.
static bool CheckingCalculationCondition(bool bCalcuEigenValue, bool bCalcuWaveFunction, unsigned int nDeflationGroup)
Checking pre conditions for Lanczos method operation.
CKNComplex GetAt(unsigned int nIndex)
Get element value from specific index.
static bool IsLanczosComputeRoot()
Barrier current deflation group.
Structure for engienvalue computing.
bool DoEigenValueSolving(int nIterationCount, double *pAlpha, double *pBeta, double fANorm, LPEIGENVALUE_RESULT lpResult, bool bFinal)
Every user set iteration count calculate eigenvalues.
int ConvergenceCheckingEx(int nEigenValueCount, double *pEigenValues, double *pEiegnVectors, bool *pbValidEigenValue, double fANorm, double *pBeta, int nIterationCount)
Checking convergence criteria.
#define CHECK_EV_ORTH_SIMPLE
static void MVMul(CKNCSR *pAMatrix, CKNVector *pVector, CKNVector *pResult)
Matrix and vector multiple operation.
unsigned int nDegeneratedEigenValueCount
static void MVMulOptimal(CKNCSR *pAMatrix, CKNVector *pVector, CKNVector *pResult)
Matrix and vector multiple operation for multiple call.
unsigned int * pEigenValueFoundIteration
int SpuriousRitzValueChecking(int nEigenValueCount, double *pEigenValues, double *pEigenVectors, double *pNonSpuriousValues, double *pNonSpuriousVectors, double fANorm, int nIterationCount)
Checking spurious values.
#define SEND_EV_TO_MASTER
double_vector_t m_vectValueImaginaryBuffer
A member variable for saving none zero elements.
unsigned int nMaxEigenValueFoundIteration
void ExtractDoubleVector(unsigned int nVectorsize, double *pTarget, double *pSource, unsigned int nSrcCount, int *pFilter, unsigned int nFilterCount, bool bExclusive)
Extract vectors by condition that described in filter.
void inverse_iter(double *alpha, double *beta, double *app_evc, int iter, double app_eva)
double * pDegeneratedEigenValues
void Finalize()
Free allocated memory for vector elements.
double GetAbsolute()
Get Absolute value of complex number.
LPEIGENVALUE_RESULT DoLanczosMethod(CKNMatrixOperation::CKNCSR *pAMatrix, unsigned int nIterationCount, unsigned int nEigenValueCheckInterval, unsigned int nEigenValueCount, double fEigenvalueMin, double fEignevalueMax, double fConvergenceTolerance, bool bReorthogonalization, bool bCalcuEigVector, bool bWaveFunction, double load_in_MIC, CKNMatrixOperation::CKNCSR *pmylocalblock=NULL, CKNMatrixOperation::CKNCSR *leftlocalblock=NULL, CKNMatrixOperation::CKNCSR *rightlocalblock=NULL)
Doing lanczos method.
void InitVariables()
Deallocating member variables.
This class includes functions for matrix debugging.
static double GetTotalTakeTime()
static void SortSolution(LPEIGENVALUE_RESULT lpResult)
Sorting computing eigenvalue.
int DistinguishClusterOfEigenvalue(int nEigenValueCount, double *pEigenValues, double *pEigenVectors, double *pNonClustersValues, double *pNonClustersVectors, int nIterationCount)
Distinguish clusters values.
static void ExchangeCommand(double *pfCommand, MPI_Comm comm)
Gather eigenvalue finding iteration number from deflation group.
double GetNorm(bool bMPI=false)
Getting norm of vector.
void CalculateEigenVector(LPEIGENVALUE_RESULT lpResult, CKNMatrixOperation::CKNVector V, unsigned int nIterationIndex)
Calculate Eigen vector of A Matrix.
static int GetCurrentRank()
unsigned int nOriginalIndex
void thomas_alg(double **T, double *initguess, double *app_evc, int iter)
static MPI_Comm GetDeflationComm()
Getting Lanczos computing group MPI_Comm.
void IntegrateEigenvaluesEx(int nIterationCount, LPEIGENVALUE_RESULT lpResult, unsigned int nCalculatedEigenValueCount, unsigned int nCalculatedEigenValueCountBeforeConvergenceCheck, double *pCalcuResult_Value, double *pCalcuResult_Vector, bool *pbValidEigenValue)
Integrating computing solution during Lanczos method operation.
double_vector_t m_vectValueRealBuffer
A member variable for saving none zero elements.
#define ALLOC_WITH_NULL_INIT(pointer, data_type, data_size)
static void AppendEigenValue(LPEIGENVALUE_RESULT lpResult, double fEigenValue, unsigned int nFindIteration=DEGENERATED_INDEX, bool bInsertFirst=false)
Checking is aborting computation flag.
void SetAt(unsigned int nIndex, CKNComplex value)
Set element value in specific index, Call by value.
unsigned int nEigenValueCount
#define FREE_MEM(pointer)
Macro for memory allocation and assign null value.
unsigned int GetSize()
Return Vector elements size.
~CKNLanczosMethod()
Destructor.
unsigned int nEigenValueCountForMemeory
void FinalizeTemporaryArrayAndVector()
Finalize temporary eigenvalue arrays and vectors.
void ExtractDoubleValues(double *pTarget, double *pSource, unsigned int nSrcCount, int *pFilter, unsigned int nFilterCount, bool bExclusive)
Extract value by condition that described in filter.
const unsigned long ERROR_MALLOC
Error code that means error occur during memory allocation.
This class for complex operation and saving value.
static void BarrierAllComm()
Is Multilevel MPI Setting.
static void ReleaseResult(LPEIGENVALUE_RESULT lpResult, bool bReleaseStruct)
Release memory for lanczos method result.
static void MeasurementStart(MEASUREMENT_INDEX index)
Measurement start for part.
int DistinguishClusterOfEigenvalueEx(int nEigenValueCount, double *pEigenValues, double *pEigenVectors, bool *pbValidEigenValues, int nIterationCount)
Distinguish clusters values.
LPEIGENVALUE_RESULT LanczosIteration()
Doing lanczos basic iteration.
static bool IsRootRank()
Get Total node count.
void LanczosIterationLoop(LPEIGENVALUE_RESULT lpResult, CKNMatrixOperation::CKNVector *V1, unsigned int nIterationCount, CKNComplex *pAlpha, double *pAlphaReal, double *pBeta, double *pWj, double *pWjm1, double *pWjp1, bool bMakeEigvVector=false)
Doing lanczos basic iteration.
static bool m_bStop
Determind stop iteration before end of iteration count.
int RangeChecking(int nEigenValueCount, double *pEigenValues, double *pEiegnVectors, double *pRangeCheckingEigenValues, double *pRangeCheckingVectors, int nIterationCount)
Checking eigenvalue range.
static bool IsSame(double operand1, double operand2, double tol)
Compare two double variable.
static int Gram_schmidt(CKNVector *pVect1, CKNVector *pVect2)
Doing gam schmidt orthogonalization.
int EigenValueSolver(unsigned int nIterationCount, double *pAlpha, double *pBeta, double *pEigenValues, double *pEigenVectors)
EigenValue Solving.
static bool IsMultiLevelMPI()
Get MPI_Comm.
int nEigenValueFoundIteration
int ConvergenceChecking(int nEigenValueCount, double *pEigenValues, double *pEiegnVectors, double *pConvergedEigenValues, double *pConvergedEigenVectors, double fANorm, double *pBeta, int nIterationCount)
Checking convergence criteria.
#define DO_NOT_CONVERGENCE_CHECKING
Convergernece checking option default value.
This class for describing vector for Lanczos method.
static void ShowLanczosResult(CKNLanczosMethod::LPEIGENVALUE_RESULT lpResult, bool bCalculateEigenVectors, bool bCalculateWaveFunction, double *pKValue, int nRepeatCount)
Save calculating result into file.
static bool VVDot(CKNVector *pVector1, CKNVector *pVector2, CKNComplex *pResult)
Between vectors dot product operation.
static bool IsDeflationRoot()
Checking is root rank of Lanczos computation.