16 #define LOOP_OPTIMIZE_COUNT 10
42 if (nSize == m_nValueCount)
45 m_nValueCount = nSize;
46 m_vectValueRealBuffer.resize(nSize);
47 m_vectValueImaginaryBuffer.resize(nSize);
71 if (nIndex > GetSize())
76 m_vectValueRealBuffer[nIndex] = fReal;
77 m_vectValueImaginaryBuffer[nIndex] = fImaginary;
86 if (nIndex > GetSize())
92 m_rtnTemp.SetComplexNumber(m_vectValueRealBuffer[nIndex], m_vectValueImaginaryBuffer[nIndex]);
104 if (nIndex > GetSize())
110 rtnComplex.
SetComplexNumber(m_vectValueRealBuffer[nIndex], m_vectValueImaginaryBuffer[nIndex]);
118 for (i = 0; i < GetSize(); i++)
120 m_vectValueRealBuffer[i] = 0.;
121 m_vectValueImaginaryBuffer[i] = 0.;
134 for (i = 0; i < GetSize(); i++)
136 tempComplex.
SetComplexNumber(m_vectValueRealBuffer[i], m_vectValueImaginaryBuffer[i]);
137 tempComplex = tempComplex * Scalar;
154 for (i = 0; i < GetSize(); i++)
156 m_vectValueRealBuffer[i] *= fScalar;
157 m_vectValueImaginaryBuffer[i] *= fScalar;
169 for (i = 0; i < GetSize(); i++)
171 tempComplex.
SetComplexNumber(m_vectValueRealBuffer[i], m_vectValueImaginaryBuffer[i]);
172 tempComplex = tempComplex / Scalar;
184 double *pReal = NULL, *pImaginary = NULL;
186 unsigned int nSize = GetSize();
188 pReal = m_vectValueRealBuffer.data();
189 pImaginary = m_vectValueImaginaryBuffer.data();
191 #pragma omp parallel for
192 for (i = 0; i < nSize ; i++)
197 pImaginary[i] /= fScalar;
205 srand((
unsigned int)time(NULL));
206 for (i = 0; i < m_nValueCount; i++)
208 m_vectValueRealBuffer[i] = rand();
209 m_vectValueImaginaryBuffer[i] = rand();
218 double fVectorSize = GetNorm(bMPI);
221 for (i = 0; i < m_nValueCount; i++)
223 m_vectValueRealBuffer[i] /= fVectorSize;
224 m_vectValueImaginaryBuffer[i] /= fVectorSize;
239 double *pReal = m_vectValueRealBuffer.data();
240 double *pImaginary = m_vectValueImaginaryBuffer.data();
242 #pragma omp parallel for reduction(+:fTotal)
243 for (i = 0; i < m_nValueCount; i++)
245 const double fReal = pReal[i];
246 const double fImaginary = pImaginary[i];
251 fNorm = fReal * fReal + fImaginary * fImaginary;
266 unsigned int i, nSize = GetSize();
267 double *pReal = NULL, *pImaginary = NULL;
268 double *pOperandReal = NULL, *pOperandImagianray = NULL;
270 if (nSize != vector->
GetSize())
276 pReal = m_vectValueRealBuffer.data();
277 pImaginary = m_vectValueImaginaryBuffer.data();
281 #pragma omp parallel for
282 for (i = 0; i < nSize; i++)
284 pReal[i] = pReal[i] - fScalar * pOperandReal[i];
285 pImaginary[i] = pImaginary[i] - fScalar * pOperandImagianray[i];
295 unsigned int i, nSize = GetSize();
296 double *pReal = NULL, *pImaginary = NULL;
297 double *pOperandReal = NULL, *pOperandImagianray = NULL;
300 if (nSize != pVector->
GetSize())
306 pReal = m_vectValueRealBuffer.data();
307 pImaginary = m_vectValueImaginaryBuffer.data();
311 #pragma omp parallel for
312 for (i = 0; i < nSize; i++)
314 pReal[i] = pReal[i] - fReal* pOperandReal[i] + fImaginary * pOperandImagianray[i];
315 pImaginary[i] = pImaginary[i] - fReal * pOperandImagianray[i] - fImaginary * pOperandReal[i];
324 unsigned int i, nSize = GetSize();
327 if (nSize != vector->
GetSize())
333 for (i = 0; i < nSize; i++)
335 pOperand1 = GetAtPt(i);
336 pOperand2 = vector->
GetAtPt(i);
347 unsigned int i, nSize = GetSize();
350 if (nSize != vector->
GetSize())
356 for (i = 0; i < nSize; i++)
358 pOperand1 = GetAtPt(i);
359 pOperand2 = vector->
GetAtPt(i);
367 m_vectValueRealBuffer.clear();
368 m_vectValueImaginaryBuffer.clear();
383 if (nStartIndex > m_nValueCount || nStartIndex + pVector->
GetSize() > m_nValueCount)
386 for (i = 0; i < pVector->
GetSize(); i++)
403 double *pReal = NULL, *pImaginariy = NULL;
406 if( NULL == pBuffer )
411 pReal = m_vectValueRealBuffer.data();
412 memcpy(pReal, pBuffer, m_nValueCount *
sizeof(
double));
414 pImaginariy = m_vectValueImaginaryBuffer.data();
415 memcpy(pImaginariy, pBuffer + m_nValueCount, m_nValueCount *
sizeof(
double));
419 pReal = m_vectValueRealBuffer.data();
420 memcpy(pBuffer, pReal, m_nValueCount *
sizeof(
double));
422 pImaginariy = m_vectValueImaginaryBuffer.data();
423 memcpy(pBuffer + m_nValueCount, pImaginariy, m_nValueCount *
sizeof(
double));
446 unsigned int i, nSize = GetSize();
454 for (i = 0; i < nSize; i++)
456 Rtn += (GetAt(i) * vector.
GetAt(i));
467 return operator-((*vector));
477 unsigned int i, nSize = GetSize();
487 for (i = 0; i < nSize; i++)
489 rtnVector.
SetAt(i, GetAt(i) - vector.
GetAt(i));
500 return operator+((*vector));
510 unsigned int i, nSize = GetSize();
519 for (i = 0; i < nSize; i++)
521 rtnVector.
SetAt(i, GetAt(i) + vector.
GetAt(i));
533 operator=((*vector));
542 unsigned int i, nSize = vector.
GetSize();
543 double *pReal = NULL, *pImaginary = NULL;
544 double *pSourceReal = NULL, *pSourceImagianry = NULL;
548 pReal = m_vectValueRealBuffer.data();
549 pImaginary = m_vectValueImaginaryBuffer.data();
553 #pragma omp parallel for
554 for (i = 0; i < nSize; i++)
556 pReal[i] = pSourceReal[i];
557 pImaginary[i] = pSourceImagianry[i];
590 if (!m_vectValueBuffer.empty())
594 for (i = 0; i < nRow; i++)
596 for (j = 0; j < nColumn; j++)
599 m_vectValueBuffer.push_back(element);
604 m_nColumnCount = nColumn;
631 if (nRow > m_nRowCount || nColumn > m_nColumnCount)
634 m_vectValueBuffer[m_nColumnCount*nRow + nColumn].SetComplexNumber(fRealNumber, fImageNumber);
655 if (nRowStart > m_nRowCount || nRowStart + nSrcRowCount > m_nRowCount
656 || nColumnStart > m_nColumnCount || nColumnStart + nSrcColumnCount > m_nColumnCount)
659 for (i = 0; i < nSrcRowCount; i++)
661 for (j = 0; j < nSrcColumnCount; j++)
664 m_vectValueBuffer[m_nColumnCount*(nRowStart + i) + (nColumnStart + j)] = complexNumber;
678 unsigned int nFormerRow = m_nRowCount;
679 unsigned int nFormerColumn = m_nColumnCount;
681 std::vector<CKNComplex> vectTemp;
686 m_nRowCount += nCount;
688 case COLUMN_DIRECTION:
689 m_nColumnCount += nCount;
693 for (i = 0; i < m_nRowCount * m_nColumnCount; ++i)
696 vectTemp.push_back(element);
699 for (i = 0; i < nFormerRow; ++i)
701 for (j = 0; j < nFormerColumn; ++j)
703 CKNComplex element = m_vectValueBuffer[nFormerColumn*i + j];
718 if (vector.
GetSize() != m_nRowCount || vector.
GetSize() != m_nColumnCount)
721 for (i = 0; i < m_nRowCount; ++i)
723 SetElement(i, i, vector.
GetAt(i));
737 return m_vectValueBuffer[m_nColumnCount*nRowIndex + nColumnIndex];
750 if (vector.
GetSize() > m_nRowCount)
753 for (i = 0; i < vector.
GetSize(); ++i)
755 SetElement(i, nColumnIndex, vector.
GetAt(i));
772 if (vector.
GetSize() > m_nColumnCount)
775 for (i = 0; i < vector.
GetSize(); ++i)
777 SetElement(nRowIndex, i, vector.
GetAt(i));
791 for (i = 0; i < m_vectValueBuffer.size(); i++)
793 m_vectValueBuffer[i] = m_vectValueBuffer[i] * Scalar;
802 double fReal, fImaginary;
805 for (i = 0; i < m_vectValueBuffer.size(); i++)
807 fReal = m_vectValueBuffer[i].GetRealNumber();
808 fImaginary = m_vectValueBuffer[i].GetImaginaryNumber();
809 m_vectValueBuffer[i].SetComplexNumber(fScalar*fReal, fScalar*fImaginary);
824 if (nColumnIndex > m_nColumnCount)
828 for (i = 0; i < m_nRowCount; ++i)
829 pVector->
SetAt(i, m_vectValueBuffer[i*m_nColumnCount + nColumnIndex]);
845 if (nRowIndex > m_nRowCount)
848 pVector->
SetSize(m_nColumnCount);
849 for (i = 0; i < m_nColumnCount; ++i)
850 pVector->
SetAt(i, m_vectValueBuffer[nRowIndex*m_nColumnCount + i]);
869 if (nRowStartIndex + nRowCount > m_nRowCount)
872 if (nColumnCount + nColumnCount > m_nColumnCount)
879 for (i = nRowStartIndex; i < nRowStartIndex + nRowCount; i++)
881 for (j = nColumnStartIndex; j < nColumnStartIndex + nColumnCount; j++)
883 pMatrix->
SetElement(i - nRowStartIndex, j - nColumnStartIndex, m_vectValueBuffer[i*m_nColumnCount + j]);
896 unsigned int nCount = m_vectValueBuffer.size(), i;
898 for (i = 0; i < nCount; ++i)
899 m_vectValueBuffer[i].Division(2.0);
907 operator+=((*matrix));
920 for (i = 0; i < m_nRowCount; ++i)
922 for (j = 0; j < m_nColumnCount; ++j)
924 m_vectValueBuffer[m_nColumnCount*i + j] = m_vectValueBuffer[m_nColumnCount*i + j] + matrix.
GetElement(i, j);
934 operator=((*matrix));
946 for (i = 0; i < m_nRowCount; i++)
948 for (j = 0; j < m_nColumnCount; j++)
958 unsigned int i, j, nTemp;
962 if (m_nColumnCount == m_nRowCount)
964 for (i = 0; i < m_nRowCount; ++i)
966 for (j = 0; j < m_nColumnCount; ++j)
971 tempNumber = m_vectValueBuffer[i*m_nColumnCount + j];
972 m_vectValueBuffer[i*m_nColumnCount + j] = m_vectValueBuffer[j*m_nColumnCount + i];
973 m_vectValueBuffer[j*m_nColumnCount + i] = tempNumber;
979 std::vector<CKNComplex> tempVector;
981 for (i = 0; i < m_nColumnCount; i++)
983 for (j = 0; j < m_nRowCount; j++)
986 tempVector.push_back(element);
991 for (i = 0; i < m_nRowCount; ++i)
992 for (j = 0; j < m_nColumnCount; ++j)
993 tempVector[j*m_nRowCount + i] = m_vectValueBuffer[i*m_nColumnCount + j];
995 nTemp = m_vectValueBuffer.size();
996 for (i = 0; i < nTemp; ++i)
997 m_vectValueBuffer[i] = tempVector[i];
1000 m_nRowCount = m_nColumnCount;
1001 m_nColumnCount = nTemp;
1022 m_nValueStackCount = 0;
1026 m_fFirstRowIndex = 0;
1027 nComponentsFirstUnitCell = 0;
1028 nComponentsLastUnitCell = 0;
1043 for (
unsigned int i = 0; i < m_nRowCount + 1; i++)
1044 m_vectRow.push_back(MAX_INDEX);
1074 int nValueCount = 0, i, j, k, nValidIndex[5];
1078 for (i = 0; i < 4; ++i)
1080 if (NULL != lpData[i + 1].pMatrix)
1081 nValidIndex[nValueCount++] = i + 1;
1083 nValidIndex[nValueCount++] = 0;
1088 for (j = 0; j < nValueCount; ++j)
1126 if (m_nRowCount < nRow + nRowCount - nRowStart || m_nColumnCount < nColumn + nColumnCount - nColumnStart)
1129 for (i = nRowStart; i < nRowCount; i++)
1131 for (j = nColumnStart; j < nColumnCount; j++)
1159 for (i = nRowStart; i < nRowStart + nRowCount; ++i)
1161 for (j = nColumnStart; j < nColumnStart + nColumnCount; ++j)
1163 bRtn = ElementScalarMultiple(i, j, Scalar);
1183 bool bResult, bRtn =
false;;
1185 tempNumber = GetElement(nRow, nColumn, bResult);
1189 tempNumber = tempNumber * Scalar;
1190 bRtn = SetAt(tempNumber, nRow, nColumn);
1204 bool bResult, bRtn =
false;;
1206 tempNumber = GetElement(nRow, nColumn, bResult);
1210 tempNumber = tempNumber * fScalar;
1211 bRtn = SetAt(tempNumber, nRow, nColumn);
1229 unsigned int nMatrixRowCount = pMatrix->
GetRowCount();
1235 if (m_nRowCount < nRow + nRowCount - nRowStart || m_nColumnCount < nColumn + nColumnCount - nColumnStart)
1238 for (i = nRowStart; i < nRowCount; i++)
1240 for (j = nColumnStart; j < nColumnCount; j++)
1246 SetAt(tempNumber, nRow + i - nRowStart, nColumn + j - nColumnStart);
1260 if (nIndex > GetRowCount() + 1)
1266 return m_vectRow[nIndex];
1275 if (nIndex > GetNoneZeroCount())
1282 return m_vectColumn[nIndex];
1292 if (nIndex > GetNoneZeroCount())
1298 m_rtnTemp.SetComplexNumber(m_vectValueRealBuffer[nIndex], m_vectValueImaginaryBuffer[nIndex]);
1310 m_vectValueRealBuffer.push_back(fRealValue);
1311 m_vectValueImaginaryBuffer.push_back(fImaginaryValue);
1312 m_vectColumn.push_back(nCol);
1314 if (MAX_INDEX == GetRowIndexNo(nRow))
1315 m_vectRow[nRow] = m_nValueStackCount;
1317 m_nValueStackCount++;
1323 m_vectRow[GetRowCount()] = GetNoneZeroCount();
1338 bool CKNMatrixOperation::CKNCSR::ConvertDoubleArray(
unsigned int *pRowPtr,
unsigned int *pColIndex,
double *pNNZValueReal,
double *pNNZValueImaginary,
unsigned int nNNZSize,
unsigned int nRowSize,
unsigned int nColSize,
unsigned int nFirstIndex,
bool bZerobase)
1341 int nAdjustIndex = 0;
1343 int nStartIndex, nEndIndex;
1344 double fReal, fImaginary;
1346 SetRowCount(nRowSize);
1347 SetColumnCount(nColSize);
1353 for( i = 0; i < nRowSize ; ++ i )
1355 nStartIndex = pRowPtr[i] + nAdjustIndex;
1356 nEndIndex = pRowPtr[i+1] + nAdjustIndex;
1357 for( j = nStartIndex ; j < nEndIndex ; ++ j)
1360 fReal = pNNZValueReal[j];
1364 if( pNNZValueImaginary )
1365 fImaginary = pNNZValueImaginary[j];
1369 PushNoneZeroValue(fReal, fImaginary, i, pColIndex[j] + nAdjustIndex);
1374 SetFirstRowIndex(nFirstIndex + nAdjustIndex);
1385 if (m_vectColumn.size() > 0)
1386 m_vectColumn.clear();
1388 if (m_vectValueRealBuffer.size() > 0)
1389 m_vectValueRealBuffer.clear();
1391 if (m_vectValueImaginaryBuffer.size() > 0)
1392 m_vectValueImaginaryBuffer.clear();
1394 if (m_vectRow.size() > 0)
1405 unsigned int nOriginSize, i;
1407 if (
false == bRow &&
false == bColumn)
1412 nOriginSize = GetRowCount();
1413 SetRowCount(nMulti*nOriginSize);
1418 for (i = 0; i < GetRowCount(); i++)
1419 m_vectRow.push_back(MAX_INDEX);
1420 m_vectRow.push_back(GetNoneZeroCount());
1422 for (i = 0; i < temp.size() - 1; i++)
1423 m_vectRow[i*nMulti] = temp[i];
1428 nOriginSize = GetColumnCount();
1429 SetColumnCount(nMulti*nOriginSize);
1430 for (i = 0; i < GetNoneZeroCount(); i++)
1431 m_vectColumn[i] *= nMulti;
1443 unsigned int nRowIndex = nRowFrom;
1445 if (nRowFrom > m_nRowCount)
1448 while (MAX_INDEX == m_vectRow[nRowIndex])
1451 if (nRowIndex > m_nRowCount)
1455 nValueIndex = m_vectRow[nRowIndex];
1468 for (i = nRowFrom; i < GetRowCount(); i++)
1470 if (MAX_INDEX != m_vectRow[i])
1483 unsigned int nIndex;
1484 unsigned int nRowStart;
1485 CKNComplex bufferElement = number, tempElement;
1486 unsigned int bufferColIndex = nColumn;
1487 double_vector_t::iterator valueIndex = m_vectValueRealBuffer.begin();
1488 uint_vector_t::iterator columnIndex = m_vectColumn.begin();
1491 if (nRow >= GetRowCount() || nColumn >= GetColumnCount())
1494 if (IsNonzeroElement(nRow, nColumn, nIndex))
1501 nRowStart = GetRowIndexNo(nRow);
1502 if (MAX_INDEX != nRowStart)
1504 unsigned int nInsertPos;
1506 if (GetColIndexNo(nRowStart) > nColumn)
1508 nInsertPos = nRowStart;
1512 nInsertPos = GetNextNonzeroValueIndex(nRow, nColumn);
1515 if (MAX_INDEX == nInsertPos)
1522 m_vectValueRealBuffer.insert(valueIndex + nInsertPos, bufferElement.
GetRealNumber());
1523 m_vectValueImaginaryBuffer.insert(valueIndex + nInsertPos, bufferElement.
GetImaginaryNumber());
1524 m_vectColumn.insert(columnIndex + nInsertPos, nColumn);
1525 IncreaseRowIndex(nRowStart + 1);
1526 IncreaseNoneZeroCount();
1532 if (GetNextRowIndexValue(nRow + 1, nRowStart))
1534 m_vectValueRealBuffer.insert(valueIndex + nRowStart, bufferElement.
GetRealNumber());
1535 m_vectValueImaginaryBuffer.insert(valueIndex + nRowStart, bufferElement.
GetImaginaryNumber());
1536 m_vectColumn.insert(columnIndex + nRowStart, nColumn);
1537 m_vectRow[nRow] = nRowStart;
1538 IncreaseRowIndex(nRowStart + 1);
1539 IncreaseNoneZeroCount();
1563 unsigned int nSubStart = GetRowIndexNo(nRow), nSubEnd = GetRowIndexNo(nRow + 1), i;
1566 if (MAX_INDEX == nSubStart)
1567 return elementValue;
1569 if (MAX_INDEX == nSubEnd)
1571 unsigned int nIndex = nRow + 2;
1573 while (MAX_INDEX == nSubEnd && nIndex <= GetRowCount())
1574 nSubEnd = GetRowIndexNo(nIndex++);
1577 for (i = nSubStart; i < nSubEnd; i++)
1579 if (nColumn == m_vectColumn[i])
1582 elementValue.
SetComplexNumber(m_vectValueRealBuffer[i], m_vectValueImaginaryBuffer[i]);
1583 return elementValue;
1587 return elementValue;
1602 unsigned int nSubStart = GetRowIndexNo(nRow);
1603 unsigned int nSubEnd = GetRowIndexNo(nRow + 1);
1605 if (0 == GetNoneZeroCount())
1608 if (MAX_INDEX == nSubStart)
1611 if (MAX_INDEX == nSubEnd)
1613 if (!GetNextRowIndexValue(nRow + 2, nSubEnd))
1619 for (i = nSubStart; i < nSubEnd; i++)
1621 unsigned int nColIndex = GetColIndexNo(i);
1622 if (nColIndex == nColumn)
1643 unsigned int nSubStart = GetRowIndexNo(nRow), nSubEnd = GetRowIndexNo(nRow + 1);
1644 unsigned int nTemp = nRow;
1646 if (MAX_INDEX == nSubEnd)
1648 if (!GetNextRowIndexValue(nRow + 2, nSubEnd))
1654 for (i = nSubStart; i < nSubEnd; i++)
1656 unsigned int nColIndex = GetColIndexNo(i);
1657 if (nColIndex > nColumn)
1673 unsigned int tempRowData, bufferRowData;
1675 if (nRow >= GetRowCount())
1679 for (i = nRow; i < GetRowCount(); i++)
1681 tempRowData = GetRowIndexNo(i);
1682 m_vectRow[i] = bufferRowData;
1683 bufferRowData = tempRowData;
1685 m_vectRow.push_back(bufferRowData);
1686 SetRowCount(GetRowCount() + 1);
1697 m_vectRow[GetRowCount()] = -1;
1698 SetRowCount(GetRowCount() + 1);
1699 m_vectRow.push_back(GetNoneZeroCount());
1711 unsigned int i, nNonZeroValueCount;
1713 if (nColumn >= GetColumnCount())
1716 nNonZeroValueCount = GetNoneZeroCount();
1718 for (i = 0; i < nNonZeroValueCount; i++)
1720 if (m_vectColumn[i] >= nColumn)
1724 SetColumnCount(GetColumnCount() + 1);
1735 SetColumnCount(GetColumnCount() + 1);
1746 unsigned int i, j, nSize = GetRowCount();
1748 if (nSize != vector.
GetSize())
1754 rtnVector.
SetSize(GetRowCount());
1755 for (i = 0; i < nSize; i++)
1758 unsigned int nSubStart = GetRowIndexNo(i), nSubEnd = GetRowIndexNo(i + 1);
1760 if (MAX_INDEX == nSubStart)
1763 if (MAX_INDEX == nSubEnd)
1765 unsigned int nIndex = i + 2;
1767 while (MAX_INDEX == nSubEnd && nIndex <= GetRowCount())
1768 nSubEnd = GetRowIndexNo(nIndex++);
1771 for (j = nSubStart; j < nSubEnd; j++)
1773 unsigned int nColIndex = GetColIndexNo(j);
1774 fSubTotal += (*(GetValue(j))*vector.
GetAt(nColIndex));
1776 rtnVector.
SetAt(i, fSubTotal);
1810 #define REPEAT_COUNT 1000
1835 for (i = 0; i < readSize; i++)
1866 while (0 != (readSize = fread(Data,
sizeof(
CSR_ELEMENT_DATA), REPEAT_COUNT, fDataFile)))
1868 for (i = 0; i < readSize; i++)
1870 if (0 == Data[i].nRow && 0 == Data[i].nColumn)
1873 pCSR->
PushNoneZeroValue(Data[i].fReal, Data[i].fImaginary, (
unsigned int)Data[i].nRow - 1, (
unsigned int)Data[i].nColumn - 1);
1926 for (i = 0; i < nDataCount; i++)
1937 for (i = 0; i < nDataCount; i++)
1939 pCSR->
PushNoneZeroValue(lpData[i].fReal, lpData[i].fImaginary, (
unsigned int)lpData[i].nRow - 1, (
unsigned int)lpData[i].nColumn - 1);
1955 unsigned int row, col;
1965 unsigned int nIndex = 0;
1966 bool bPushedInRow =
false;
1967 for (row = 0; row < nRowOrder; row++)
1969 for (col = 0; col < nColumnOrder; col++)
1971 if (0 != pReal[row*nColumnOrder + col] || 0 != pImaginary[row*nColumnOrder + col])
1974 bPushedInRow =
true;
1992 unsigned int nSubStart, nSubEnd;
2001 for (i = nStart; i < (
unsigned int)nEnd; i++)
2003 nSubStart = GetRowIndexNo(i);
2004 nSubEnd = GetRowIndexNo(i + 1);
2005 for (j = nSubStart; j < nSubEnd; j++)
2010 pCSR->
PushNoneZeroValue(m_vectValueRealBuffer[i], m_vectValueImaginaryBuffer[i], nRowIndex, m_vectColumn[j]);
2028 bool bRtn =
false, bResult;
2029 unsigned int i, j, nRowIndex;
2030 unsigned int nVectorIndex = 0, nVectorEnd = pOperand->
GetSize();
2032 if( bUseSplitVector )
2034 if( pOperand->
GetSize() != m_nRowCount / 10 )
2039 if( pOperand->
GetSize() != m_nColumnCount / 10 )
2042 nVectorIndex = (
unsigned int)m_fFirstRowIndex/10;
2043 nVectorEnd = nVectorIndex + m_nRowCount / 10;
2047 for ( i = nVectorIndex; i < nVectorEnd ; ++ i )
2052 CKNComplex csrElement = GetElement(nRowIndex, nRowIndex + (
unsigned int)m_fFirstRowIndex, bResult);
2059 csrElement = csrElement + number;
2062 csrElement = csrElement - number;
2065 csrElement = csrElement * number;
2068 csrElement = csrElement / number;
2072 SetAt(csrElement, nRowIndex, nRowIndex + (
unsigned int)m_fFirstRowIndex);
2102 unsigned int i, nCount;
2104 out = fopen(pstrFileName,
"wt");
2107 fputs(
"None Zero Value\r\n", out);
2108 fputs(
"------------------------------------------\r\n", out);
2109 nCount = GetNoneZeroCount();
2110 for (i = 0; i < nCount; i++)
2111 fprintf(out,
"%f + %fi\r\n", m_vectValueRealBuffer[i], m_vectValueImaginaryBuffer[i]);
2113 fputs(
"Column\r\n", out);
2114 fputs(
"------------------------------------------\r\n", out);
2115 nCount = GetNoneZeroCount();
2116 for (i = 0; i < nCount; i++)
2117 fprintf(out,
"%u\r\n", m_vectColumn[i]);
2119 fputs(
"Row\r\n", out);
2120 fputs(
"------------------------------------------\r\n", out);
2121 nCount = GetRowCount() + 1;
2122 for (i = 0; i < nCount; i++)
2123 fprintf(out,
"%u\r\n", GetRowIndexNo(i));
2137 CKNVector *pOperandVector = NULL, VOperand;
2138 double *pMatrixReal = NULL, *pMatrixImaginary = NULL;
2139 double *pVectorReal = NULL, *pVectorImaginary = NULL;
2140 double *pResultReal = NULL, *pResultImaginary = NULL;
2141 unsigned int *pMatrixRow = NULL, *pMatrixColumn = NULL;
2143 VOperand = *pVector;
2144 pOperandVector = &VOperand;
2146 #ifndef DISABLE_MPI_ROUTINE
2150 pOperandVector = &VOperand;
2152 pOperandVector = pVector;
2155 if (nSize != pOperandVector->GetSize())
2164 pMatrixRow = pAMatrix->
m_vectRow.data();
2166 pVectorReal = pOperandVector->m_vectValueRealBuffer.data();
2167 pVectorImaginary = pOperandVector->m_vectValueImaginaryBuffer.data();
2171 unsigned int input_real_size = pOperandVector->m_vectValueRealBuffer.size();
2172 unsigned int input_imaginary_size = pOperandVector->m_vectValueImaginaryBuffer.size();
2176 #pragma omp parallel for
2177 for ( i = 0; i < nSize; i++)
2179 double real_sum = 0.0;
2180 double imaginary_sum = 0.0;
2181 const unsigned int nSubStart = pMatrixRow[i];
2182 const unsigned int nSubEnd = pMatrixRow[i + 1];
2185 for ( j = nSubStart; j < nSubEnd; j++)
2187 const unsigned int nColIndex = pMatrixColumn[j];
2188 const double m_real = pMatrixReal[j];
2189 const double m_imaginary = pMatrixImaginary[j];
2190 const double v_real = pVectorReal[nColIndex];
2191 const double v_imaginary = pVectorImaginary[nColIndex];
2193 real_sum += m_real * v_real - m_imaginary * v_imaginary;
2194 imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2197 pResultReal[i] = real_sum;
2198 pResultImaginary[i] = imaginary_sum;
2200 #ifndef DISABLE_MPI_ROUTINE
2201 VOperand.Finalize();
2202 #endif //DISABLE_MPI_ROUTINE
2213 unsigned int nSize, B, Brt, Blt;
2214 double *pMatrixValueReal = NULL, *pOperandVectorReal = NULL, *pResultVectorReal = NULL;
2215 double *pMatrixValueImaginary = NULL, *pOperandVectorImaginary = NULL, *pResultVectorImaginary = NULL;
2219 int left_neighbor = (myrank - 1 + ncpus) % ncpus;
2220 int right_neighbor = (myrank + 1) % ncpus;
2221 MPI_Request req_sr[2];
2222 MPI_Status stat_sr[2];
2228 __assume_aligned(X, 64);
2229 __assume_aligned(X, 64);
2230 __assume_aligned(Xrt, 64);
2232 __assume_aligned(pMatrixValueReal, 64);
2233 __assume_aligned(pMatrixValueImaginary, 64);
2234 __assume_aligned(pOperandVectorReal, 64);
2235 __assume_aligned(pOperandVectorImaginary, 64);
2236 __assume_aligned(pResultVectorReal, 64);
2237 __assume_aligned(pResultVectorImaginary, 64);
2238 __assume_aligned(pRow, 64);
2239 __assume_aligned(pColumn, 64);
2252 if (nSize != pVector->
GetSize())
2262 for (
int ii = 0; ii< nSize; ii++)
2264 X[2 * ii] = pOperandVectorReal[ii];
2265 X[2 * ii + 1] = pOperandVectorImaginary[ii];
2272 MPI_Waitall(2, req_sr, stat_sr);
2279 unsigned int input_size =
X_largest * 2;
2285 long long papi_values[4];
2286 PAPI_start(papi_event_set);
2290 #pragma offload target(mic:phi_tid) \
2291 nocopy(*pMatrixValueReal : REUSE) \
2292 nocopy(*pMatrixValueImaginary : REUSE) \
2293 nocopy(*pRow : REUSE) \
2294 nocopy(*pColumn : REUSE) \
2295 in(X[0:input_size] : REUSE) \
2296 nocopy(*pResultVectorReal : REUSE) \
2297 nocopy(*pResultVectorImaginary : REUSE)
2299 #pragma omp parallel for
2300 for (
unsigned int i = 0; i < nSize; i++)
2302 double real_sum = 0.0;
2303 double imaginary_sum = 0.0;
2304 const unsigned int nSubStart = pRow[i ];
2305 const unsigned int nSubEnd = pRow[i + 1];
2306 for (
unsigned int j = nSubStart; j < nSubEnd; j++)
2308 const unsigned int nColIndex = pColumn[j];
2309 const double m_real = pMatrixValueReal[j];
2310 const double m_imaginary = pMatrixValueImaginary[j];
2311 const double v_real = X[2 * nColIndex];
2312 const double v_imaginary = X[2 * nColIndex + 1];
2314 real_sum += m_real * v_real - m_imaginary * v_imaginary;
2315 imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2318 pResultVectorReal[i] = real_sum;
2319 pResultVectorImaginary[i] = imaginary_sum;
2323 PAPI_stop(papi_event_set, papi_values);
2324 printf(
"[LOCAL] L2 access = %lld | L2 miss = %lld | L2 miss rate = %g %\n", papi_values[0], papi_values[1], (papi_values[1] * 100.0) / papi_values[0]);
2325 printf(
"[LOCAL] L3 access = %lld | L3 miss = %lld | L3 miss rate = %g %\n", papi_values[2], papi_values[3], (papi_values[3] * 100.0) / papi_values[2]);
2330 MPI_Waitall(2, req_sr, stat_sr);
2334 MPI_Waitall(2, req_sr, stat_sr);
2344 pRow = rightlocalblock->
m_vectRow.data();
2348 PAPI_start(papi_event_set);
2352 #pragma offload target(mic:phi_tid) \
2353 nocopy(*pMatrixValueReal : REUSE) \
2354 nocopy(*pMatrixValueImaginary : REUSE) \
2355 nocopy(*pRow : REUSE) \
2356 nocopy(*pColumn : REUSE) \
2357 in(Xrt[0:input_size] : REUSE) \
2358 nocopy(*pResultVectorReal : REUSE) \
2359 nocopy(*pResultVectorImaginary : REUSE)
2361 #pragma omp parallel for
2362 for (
unsigned int i = 0; i < nSize; i++)
2364 double real_sum = 0.0;
2365 double imaginary_sum = 0.0;
2366 const unsigned int nSubStart = pRow[i ];
2367 const unsigned int nSubEnd = pRow[i + 1];
2368 for (
unsigned int j = nSubStart; j < nSubEnd; j++)
2370 const unsigned int nColIndex = pColumn[j];
2371 const double m_real = pMatrixValueReal[j];
2372 const double m_imaginary = pMatrixValueImaginary[j];
2373 const double v_real = Xrt[2 * nColIndex];
2374 const double v_imaginary = Xrt[2 * nColIndex + 1];
2376 real_sum += m_real * v_real - m_imaginary * v_imaginary;
2377 imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2380 pResultVectorReal[i] += real_sum;
2381 pResultVectorImaginary[i] += imaginary_sum;
2385 PAPI_stop(papi_event_set, papi_values);
2386 printf(
"[RIGHT] L2 access = %lld | L2 miss = %lld | L2 miss rate = %g %\n", papi_values[0], papi_values[1], (papi_values[1] * 100.0) / papi_values[0]);
2387 printf(
"[RIGHT] L3 access = %lld | L3 miss = %lld | L3 miss rate = %g %\n", papi_values[2], papi_values[3], (papi_values[3] * 100.0) / papi_values[2]);
2391 MPI_Waitall(2, req_sr, stat_sr);
2396 pRow = leftlocalblock->
m_vectRow.data();
2400 PAPI_start(papi_event_set);
2404 #pragma offload target(mic:phi_tid) \
2405 nocopy(*pMatrixValueReal : REUSE) \
2406 nocopy(*pMatrixValueImaginary : REUSE) \
2407 nocopy(*pRow : REUSE) \
2408 nocopy(*pColumn : REUSE) \
2409 in(Xlt[0:input_size] : REUSE) \
2410 out(pResultVectorReal[0:output_real_size] : REUSE) \
2411 out(pResultVectorImaginary[0:output_imaginary_size] : REUSE)
2413 #pragma omp parallel for
2414 for (
unsigned int i = 0; i < nSize; i++)
2416 double real_sum = 0.0;
2417 double imaginary_sum = 0.0;
2418 const unsigned int nSubStart = pRow[i ];
2419 const unsigned int nSubEnd = pRow[i + 1];
2420 for (
unsigned int j = nSubStart; j < nSubEnd; j++)
2422 const unsigned int nColIndex = pColumn[j];
2423 const double m_real = pMatrixValueReal[j];
2424 const double m_imaginary = pMatrixValueImaginary[j];
2425 const double v_real = Xlt[2 * nColIndex];
2426 const double v_imaginary = Xlt[2 * nColIndex + 1];
2428 real_sum += m_real * v_real - m_imaginary * v_imaginary;
2429 imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2432 pResultVectorReal[i] += real_sum;
2433 pResultVectorImaginary[i] += imaginary_sum;
2437 PAPI_stop(papi_event_set, papi_values);
2438 printf(
"[LEFT] L2 access = %lld | L2 miss = %lld | L2 miss rate = %g %\n", papi_values[0], papi_values[1], (papi_values[1] * 100.0) / papi_values[0]);
2439 printf(
"[LEFT] L3 access = %lld | L3 miss = %lld | L3 miss rate = %g %\n", papi_values[2], papi_values[3], (papi_values[3] * 100.0) / papi_values[2]);
2456 double *pMatrixReal = NULL, *pMatrixImaginary = NULL;
2457 double *pVectorReal = NULL, *pVectorImaginary = NULL;
2458 double *pResultReal = NULL, *pResultImaginary = NULL;
2459 unsigned int *pMatrixRow = NULL, *pMatrixColumn = NULL;
2461 unsigned int input_size1, input_size2, input_size3;
2462 unsigned int input_offset1, input_offset2, input_offset3, offsettmp[3];
2463 unsigned int output_size, output_offset;
2465 #ifndef DISABLE_MPI_ROUTINE
2466 pOperandVector = VTemp;
2476 pOperandVector = pVector;
2479 if (nSize != pOperandVector->GetSize())
2487 pMatrixRow = pAMatrix->
m_vectRow.data();
2495 input_size1 = pOperandVector->m_vectValueRealBuffer.size();
2498 #pragma offload_transfer target(mic:phi_tid) in(pVectorReal[input_offset1:input_size1] : REUSE)
2499 #pragma offload_transfer target(mic:phi_tid) in(pVectorImaginary[input_offset1:input_size1] : REUSE)
2503 input_size1 = nSizeFromPrevRank;
2504 input_size2 = nSize;
2505 input_size3 = nSizeFromNextRank;
2506 input_offset1 = offsettmp[0];
2507 input_offset2 = offsettmp[1];
2508 input_offset3 = offsettmp[2];
2510 #pragma offload_transfer target(mic:phi_tid) in(pVectorReal[input_offset1:input_size1] : REUSE)
2511 #pragma offload_transfer target(mic:phi_tid) in(pVectorImaginary[input_offset1:input_size1] : REUSE)
2512 #pragma offload_transfer target(mic:phi_tid) in(pVectorReal[input_offset2:input_size2] : REUSE)
2513 #pragma offload_transfer target(mic:phi_tid) in(pVectorImaginary[input_offset2:input_size2] : REUSE)
2514 #pragma offload_transfer target(mic:phi_tid) in(pVectorReal[input_offset3:input_size3] : REUSE)
2515 #pragma offload_transfer target(mic:phi_tid) in(pVectorImaginary[input_offset3:input_size3] : REUSE)
2518 output_size = nSizePHI;
2529 #pragma offload target(mic:phi_tid) \
2530 nocopy(*pMatrixReal : REUSE) \
2531 nocopy(*pMatrixImaginary : REUSE) \
2532 nocopy(*pMatrixRow : REUSE) \
2533 nocopy(*pMatrixColumn : REUSE) \
2534 nocopy(*pVectorReal : REUSE) \
2535 nocopy(*pVectorImaginary : REUSE) \
2536 out(pResultReal[output_offset:output_size] : REUSE) \
2537 out(pResultImaginary[output_offset:output_size] : REUSE) \
2541 #pragma omp parallel for
2542 for (
unsigned int i = 0; i < nSizePHI; i++)
2544 double real_sum = 0.0;
2545 double imaginary_sum = 0.0;
2546 const unsigned int nSubStart = pMatrixRow[i];
2547 const unsigned int nSubEnd = pMatrixRow[i + 1];
2550 for (
unsigned int j = nSubStart; j < nSubEnd; j++)
2552 const unsigned int nColIndex = pMatrixColumn[j];
2553 const double m_real = pMatrixReal[j];
2554 const double m_imaginary = pMatrixImaginary[j];
2555 const double v_real = pVectorReal[nColIndex];
2556 const double v_imaginary = pVectorImaginary[nColIndex];
2558 real_sum += m_real * v_real - m_imaginary * v_imaginary;
2559 imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2562 pResultReal[i] = real_sum;
2563 pResultImaginary[i] = imaginary_sum;
2566 #pragma omp parallel for
2567 for (
unsigned int i = nSizePHI; i < nSize; i++)
2569 double real_sum = 0.0;
2570 double imaginary_sum = 0.0;
2571 const unsigned int nSubStart = pMatrixRow[i];
2572 const unsigned int nSubEnd = pMatrixRow[i + 1];
2575 for (
unsigned int j = nSubStart; j < nSubEnd; j++)
2577 const unsigned int nColIndex = pMatrixColumn[j];
2578 const double m_real = pMatrixReal[j];
2579 const double m_imaginary = pMatrixImaginary[j];
2580 const double v_real = pVectorReal[nColIndex];
2581 const double v_imaginary = pVectorImaginary[nColIndex];
2583 real_sum += m_real * v_real - m_imaginary * v_imaginary;
2584 imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2587 pResultReal[i] = real_sum;
2588 pResultImaginary[i] = imaginary_sum;
2591 #pragma offload_wait target(mic:phi_tid) wait(&sigval)
2604 double *pMatrixReal = NULL, *pMatrixImaginary = NULL;
2605 double *pVectorReal = NULL, *pVectorImaginary = NULL;
2606 double *pResultReal = NULL, *pResultImaginary = NULL;
2607 unsigned int *pMatrixRow = NULL, *pMatrixColumn = NULL;
2608 #ifndef DISABLE_MPI_ROUTINE
2613 pOperandVector = &VOperand;
2615 pOperandVector = pVector;
2618 if (nSize != pOperandVector->GetSize())
2627 pMatrixRow = pAMatrix->
m_vectRow.data();
2629 pVectorReal = pOperandVector->m_vectValueRealBuffer.data();
2630 pVectorImaginary = pOperandVector->m_vectValueImaginaryBuffer.data();
2634 unsigned int input_real_size = pOperandVector->m_vectValueRealBuffer.size();
2635 unsigned int input_imaginary_size = pOperandVector->m_vectValueImaginaryBuffer.size();
2646 #pragma offload target(mic:phi_tid) \
2647 nocopy(*pMatrixReal : REUSE) \
2648 nocopy(*pMatrixImaginary : REUSE) \
2649 nocopy(*pMatrixRow : REUSE) \
2650 nocopy(*pMatrixColumn : REUSE) \
2651 in(pVectorReal[0:input_real_size] : LOCAL) \
2652 in(pVectorImaginary[0:input_imaginary_size] : LOCAL) \
2653 out(pResultReal[0:output_real_size] : REUSE) \
2654 out(pResultImaginary[0:output_imaginary_size] : REUSE)
2656 #pragma omp parallel for
2657 for ( i = 0; i < nSize; i++)
2659 double real_sum = 0.0;
2660 double imaginary_sum = 0.0;
2661 const unsigned int nSubStart = pMatrixRow[i];
2662 const unsigned int nSubEnd = pMatrixRow[i + 1];
2665 for ( j = nSubStart; j < nSubEnd; j++)
2667 const unsigned int nColIndex = pMatrixColumn[j];
2668 const double m_real = pMatrixReal[j];
2669 const double m_imaginary = pMatrixImaginary[j];
2670 const double v_real = pVectorReal[nColIndex];
2671 const double v_imaginary = pVectorImaginary[nColIndex];
2673 real_sum += m_real * v_real - m_imaginary * v_imaginary;
2674 imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2677 pResultReal[i] = real_sum;
2678 pResultImaginary[i] = imaginary_sum;
2680 #ifndef DISABLE_MPI_ROUTINE
2682 #endif //DISABLE_MPI_ROUTINE
2693 double *pOp1Real = NULL, *pOp1Imaginary = NULL;
2694 double *pOp2Real = NULL, *pOp2Imaginary = NULL;
2695 unsigned int i, nSize = pVector1->
GetSize();
2697 if (nSize != pVector2->
GetSize())
2709 double fReal = 0., fImaginary = 0.;
2710 #pragma omp parallel for reduction(+:fReal, fImaginary)
2711 for (i = 0; i < nSize; i++)
2715 fReal += pOp1Real[i] * pOp2Real[i] - (-1*pOp1Imaginary[i]) * pOp2Imaginary[i];
2716 fImaginary += pOp1Real[i] * pOp2Imaginary[i] + (-1*pOp1Imaginary[i]) * pOp2Real[i];
2733 int i, j, nRow, nColumn;
2742 for (i = 0; i < nRow; ++i)
2745 for (j = 0; j < nColumn; ++j)
2749 pResult->
SetAt(i, result);
2761 int nRow, nColumn, nL;
2770 for (k = 0; k < nL; ++k)
2772 for (i = 0; i < nRow; ++i)
2775 for (j = 0; j < nColumn; ++j)
2791 if (fabs(operand1 - operand2) > tol)
2804 if (fabs(fabs(operand1) - fabs(operand2)) > tol)
2823 tempVector = *pVect1;
2876 unsigned int nRowCount, nColumnCount;
2879 int left_neighbor = (myrank-1+ncpus)%ncpus;
2880 int right_neighbor = (myrank+1)%ncpus;
2884 *mine = NULL; *left = NULL; *right = NULL;
2891 (*mine)->SetRowCount(nRowCount);
2892 (*mine)->SetColumnCount(nColumnCount);
2893 (*mine)->BuildDataBuffer(); temp = 0;
2894 for (
int jj=0; jj<myrank; jj++)
2896 (*mine)->SetFirstRowIndex((
double)temp);
2905 (*left)->SetRowCount(nRowCount);
2906 (*left)->SetColumnCount(nColumnCount);
2907 (*left)->BuildDataBuffer(); temp = 0;
2908 for (
int jj=0; jj<left_neighbor; jj++)
2910 (*left)->SetFirstRowIndex((
double)temp);
2917 (*right)->SetRowCount(nRowCount);
2918 (*right)->SetColumnCount(nColumnCount);
2919 (*right)->BuildDataBuffer(); temp = 0;
2920 for(
int jj=0; jj<right_neighbor; jj++)
2922 (*right)->SetFirstRowIndex((
double)temp);
2939 unsigned int my_nnz, left_nnz, right_nnz;
2940 int isthisrowfilled;
2948 for (
unsigned int ii = 0; ii < source->
GetRowCount(); ii++)
2950 isthisrowfilled = -1;
2954 for (
unsigned int jj = nSubStart; jj < nSubEnd; jj++)
2958 if(startColIndex <= nColIndex && nColIndex <= endColIndex)
2960 isthisrowfilled = 0;
2968 if(isthisrowfilled == -1)
2984 for (
unsigned int ii = 0; ii < source->
GetRowCount(); ii++)
2986 isthisrowfilled = -1;
2990 for (
unsigned int jj = nSubStart; jj < nSubEnd; jj++)
2993 if(startColIndex <= nColIndex && nColIndex <= endColIndex)
2995 isthisrowfilled = 0;
3003 if(isthisrowfilled == -1)
3019 for (
unsigned int ii = 0; ii < source->
GetRowCount(); ii++)
3024 for(
unsigned int jj = nSubStart; jj < nSubEnd; jj++)
3027 if(startColIndex <= nColIndex && nColIndex <= endColIndex)
3056 unsigned int my_nnz, left_nnz, right_nnz;
3058 int isthisrowfilled;
3066 for (
unsigned int ii = 0; ii < source->
GetRowCount(); ii++)
3068 isthisrowfilled = -1;
3072 for (
unsigned int jj = nSubStart; jj < nSubEnd; jj++)
3076 if(startColIndex <= nColIndex && nColIndex <= endColIndex)
3078 isthisrowfilled = 0;
3083 left->
SetAt(curval, ii, nColIndex-startColIndex);
3087 if(isthisrowfilled == -1)
3091 left->
SetAt(curval, ii, 0);
3102 for (
unsigned int ii = 0; ii < source->
GetRowCount(); ii++)
3104 isthisrowfilled = -1;
3108 for (
unsigned int jj = nSubStart; jj < nSubEnd; jj++)
3111 if(startColIndex <= nColIndex && nColIndex <= endColIndex)
3113 isthisrowfilled = 0;
3118 right->
SetAt(curval, ii, nColIndex-startColIndex);
3123 if(isthisrowfilled == -1)
3127 right->
SetAt(curval, ii, 0);
3138 for (
unsigned int ii = 0; ii < source->
GetRowCount(); ii++)
3143 for(
unsigned int jj = nSubStart; jj < nSubEnd; jj++)
3146 if(startColIndex <= nColIndex && nColIndex <= endColIndex)
3152 mine->
SetAt(curval, ii, nColIndex-startColIndex);
3212 #ifndef DISABLE_MPI_ROUTINE
3213 fNorm = vectorTemp.
GetNorm(
true);
3214 #else //DISABLE_MPI_ROUTINE
3216 #endif //DISABLE_MPI_ROUTINE
bool SetDiagonal(CKNVector vector)
Set diagonal elements.
~CKNMatrixOperation()
Destructor.
void SetSize(unsigned int nSize)
Set Vector elements size.
bool InsertMatrix(unsigned int nRow, unsigned int nColumn, unsigned int nRowStart, unsigned int nColumnStart, unsigned int nRowCount, unsigned int nColumnCount, CKNMatrixOperation::CKNDMatrix *pMatrix, bool bCopyZero)
Set element by reference matrix.
double GetImaginaryNumber() const
Get imaginary part.
void ScalarDivision(CKNComplex Scalar)
Scalar division operation.
#define THROW_END_EXIT(EXCEPTION_NAME)
< Macro for exception throw than exit program
unsigned int m_nValueCount
A numbers of elements.
void AppendMatrix(APPEND_DRIECTION direction, unsigned int nCount)
Appending matrix with direction.
void ScalarMultiple(CKNComplex Scalar)
Scalar multiple operation.
void Normalize(bool bMPI=false)
Normalize vector with norm.
CKNCSR * SplitCSR(int nStart, int nEnd)
Split CSR to MPI slave.
CKNMatrixOperation::CKNVector operator/(CKNMatrixOperation::CKNVector &vector, const CKNComplex fScalar)
void IncreaseNoneZeroCount()
Increasing saved none zero elements count.
void ReorthogonalizationVector(CKNVector *pVector, CKNComplex complex)
Do reorthogonalization.
void ScalarMultiThanMinusVector(double fScalar, CKNVector *vector)
Do minus operation after scalar multiple to operand between vectors.
static void AllReduceComlex(CKNComplex *pNumber, CKNTimeMeasurement::MEASUREMENT_INDEX INDEX=CKNTimeMeasurement::COMM)
Do all reduce function with CKNComplex.
bool GetSmallMatrix(unsigned int nRowStartIndex, unsigned int nColumnStartIndex, unsigned int nRowCount, unsigned int nColumnCount, CKNMatrixOperation::CKNDMatrix *pMatrix)
Get matrix from large matrix.
unsigned int nComponentsFirstUnitCell
Atom counts for interoperaton with previous node.
static MPI_Comm GetMPIComm()
Data and operation representation of Matrix.
static void FreeCSR(CKNMatrixOperation::CKNCSR *pCSR)
Deallocating CSR memory.
bool InsertRowAtEnd()
Insert row after last row.
static unsigned int * pRow
For MPI Optimized operation using.
double_vector_t m_vectValueRealBuffer
A member variable for saving none zero elements.
void ExpandMatrix(unsigned int nMulti, bool bRow, bool bColumn)
Expand matrix order.
#define LOOP_OPTIMIZE_COUNT
double m_fFirstRowIndex
First row index in this node.
double GetRealNumber() const
Get real part.
void operator+=(CKNDMatrix &matrix)
operation overload for adding with reference parameter
static CKNMatrixOperation::CKNCSR * BuildCSRFromFileTemp(FILE *fDataFile, unsigned int nRowOrder, unsigned int nColumnOrder, int nDataCount)
Building CSR from file using double, double, double, double order.
unsigned int GetColumnCount()
Getting row size of matrix.
int compare(const void *pA, const void *pB)
Data and operation representation of CSR(Compressed Sparse Row)
bool PushMatrix(unsigned int nRow, unsigned int nColumn, unsigned int nRowStart, unsigned int nColumnStart, unsigned int nRowCount, unsigned int nColumnCount, CKNMatrixOperation::CKNDMatrix *pMatrix, bool bCopyZero)
Set element by reference matrix to end of buffer.
static void MeasurementEnd(MEASUREMENT_INDEX index)
Measurement end for part.
CKNVector operator-(CKNVector &vector)
operation overload for vector minus operation with reference parameter
unsigned int GetRowCount()
Get matrix row counts.
bool IsNonzeroElement(unsigned int nRow, unsigned int nColumn, unsigned int &nIndex)
Checking given index element has nonzero value or not.
static void MVMulEx_Optimal(CKNCSR *pAMatrix, CKNVector *pVector, CKNVector *pResult, unsigned int, unsigned int, CKNVector *, int)
Matrix and vector multiple operation for 1 layer exchanging communication.
double_vector_t m_vectValueImaginaryBuffer
A member variable for saving none zero elements.
static void MergeVector(CKNMatrixOperation::CKNVector *pVector, CKNMatrixOperation::CKNVector *pResultVector, unsigned int nMergeSize)
Merge vector to sub rank.
static int GetTotalNodeCount()
void ScalarMultiple(CKNComplex Scalar)
Scalar multiple operation.
bool ConvertDoubleArray(unsigned int *pRowPtr, unsigned int *pColIndex, double *pNNZValueReal, double *pNNZValueImaginary, unsigned int nNNZSize, unsigned int nRowSize, unsigned int nColSize, unsigned int nFirstIndex, bool bZerobase)
Convering general CSR format to CSR.
static CKNMatrixOperation::CKNCSR * BuildCSRFromOneDimArray(double *pReal, double *pImaginary, unsigned int nRowOrder, unsigned int nColumnOrder)
Building CSR from one dimension array.
static int GetLoadBalanceCount(int nRank)
#define GENERAL_TOLERANCE
General tolerance definition.
static double AllReduceDouble(double fNumber)
Do all reduce function with CKNComplex.
const unsigned long ERROR_OUT_OF_RANGE
Error code that means during access vector or matrix input index out of range.
void MinusVector(CKNVector *vector)
Do minus operation between vectors.
uint_vector_t m_vectColumn
A member variable for saving column information.
bool ElementScalarMultiple(unsigned int nRow, unsigned int nColumn, CKNComplex Scalar)
Scalar multiple operation.
void BuildDataBuffer()
Allocating memory for class member variable.
static void MMMul(CKNDMatrix *pMatrix, CKNDMatrix *pMatrixOperand, CKNDMatrix *pResult)
Matrix and matrix multiple operation.
bool AreaScalarMultiple(unsigned int nRowStart, unsigned int nRowCount, unsigned int nColumnStart, unsigned int nColumnCount, CKNComplex Scalar)
Scalar multiple operation to specific area.
unsigned int GetNextNonzeroValueIndex(unsigned int nRow, unsigned int nColumn)
Get next index of given row, column index.
static bool IsSameA(double operand1, double operand2, double tol)
Compare two double variable.
static unsigned int MAX_INDEX
constant variable for row that has no element
static int Compare(const void *pA, const void *pB)
For qick sort compare operation.
static CKNMatrixOperation::CKNCSR * BuildCSRFromFileUnsortdata(FILE *fDataFile, unsigned int nRowOrder, unsigned int nColumnOrder, int nDataCount)
uilding CSR form file and before building CSR sorting data
const unsigned long ERROR_WRONG_ORDER_OPERATION
Error code that means during operation between vector & vector, matrix & vector order don't match...
unsigned int GetRowIndexNo(unsigned int nIndex)
Getting column size of matrix.
CKNMatrixOperation()
Constructor.
bool SetAt(CKNComplex number, unsigned int nRow, unsigned int nColumn)
Set element to specific index.
Common definition for Solver.
void PushNoneZeroValue(double fRealValue, double fImaginaryValue, unsigned int nRow, unsigned int nCol)
Saving none zero value.
bool DiagonalOperation(CKNVector *pOperand, OPERATION_TYPE type, bool bUseSplitVector)
To diagonal element do operation.
static void MVMulEx_AsyncCommWithLocalBlocks(CKNMatrixOperation::CKNCSR *mylocalblock, CKNMatrixOperation::CKNCSR *leftlocalblock, CKNMatrixOperation::CKNCSR *rightlocalblock, CKNVector *pVector, CKNVector *pResult, double *X, double *Xrt, double *Xlt)
Matrix and vector multiple operation using by block csr.
CKNComplex GetAt(unsigned int nIndex)
Get element value from specific index.
Collection of vector and matrix operation.
unsigned int GetColIndexNo(unsigned int nIndex)
Getting Column information data by index.
bool GetNextRowIndexValue(unsigned int nRowFrom, unsigned int &nValueIndex)
Get row index value finding from nRowFrom to end.
static void MergeVectorEx_Optimal(CKNMatrixOperation::CKNVector *pVector, CKNMatrixOperation::CKNVector *pResultVector, unsigned int nMergeSize, double fFirstIndex, unsigned int nSizeFromPrevRank, unsigned int nSizeFromNextRank, unsigned int nSizetoPrevRank, unsigned int nSizetoNextRank, unsigned int *)
Merge vector for 1 layer exchanging.
bool TrnasPos()
Transpos matrix.
void IncreaseRowIndex(unsigned int nRowFrom)
Increase m_nRowCount array value + 1 from nRowFrom. It means at m_nRowCount element has been inserted...
bool Serialize(double *pBuffer, bool bStore)
Serialize vector.
bool PushMatrixConcurrent(unsigned int nRow, CKNMatrixOperation::LPFILL_MATRIX_DATA lpData, bool bCopyZeroOnSite)
Pushing matrix into CSR several sub matrixs.
CKNComplex operator*(CKNVector &vector)
operation overload for dot product with reference parameter
bool InsertRowBefore(unsigned int nRow)
Insert row before specific row index.
bool SetElement(unsigned int nRow, unsigned int nColumn, CKNComplex element)
Set matrix elements value.
static void UpdateLocalCSR(CKNMatrixOperation::CKNCSR *source, CKNMatrixOperation::CKNCSR *mine, CKNMatrixOperation::CKNCSR *left, CKNMatrixOperation::CKNCSR *right)
#define ATOM_DEFAULT_INDEX
Atom index default value that empty atom instance.
unsigned int nComponentsLastUnitCell
Atom counts for interoperaton with next node.
static void MVMul(CKNCSR *pAMatrix, CKNVector *pVector, CKNVector *pResult)
Matrix and vector multiple operation.
unsigned int GetColumnCount()
Get matrix column counts.
double GetFirstRowIndex()
Set first row index.
static void MVMulOptimal(CKNCSR *pAMatrix, CKNVector *pVector, CKNVector *pResult)
Matrix and vector multiple operation for multiple call.
struct CKNMatrixOperation::FILL_MATRIX_DATA * LPFILL_MATRIX_DATA
void operator=(CKNDMatrix &matrix)
operation overload for subsitution with reference parameter
CKNMatrixOperation::CKNVector operator*(const CKNComplex fScalar, CKNMatrixOperation::CKNVector &vector)
double_vector_t m_vectValueImaginaryBuffer
A member variable for saving none zero elements.
bool InsertColumnBefore(unsigned int nColumn)
Insert column before specific column index.
void SetAtEx(unsigned int nIndex, CKNComplex *pValue)
Set element value in specific index, Call by reference.
void Finalize()
Free allocated memory for vector elements.
void BuildRandomVector()
Building vector that has random value elements.
This class includes functions for matrix debugging.
void ScalarDivision(double fScalar)
Scalar division operation.
bool SetRowElement(CKNVector vector, unsigned int nRowIndex)
Set matrix element with row, column index.
double GetNorm(bool bMPI=false)
Getting norm of vector.
static int GetCurrentRank()
CKNVector operator*(CKNVector &vector)
operation overload for matrix and vector multiple operation with reference parameter ...
CKNComplex GetElement(unsigned int nRowIndex, unsigned int nColumnIndex)
Get matrix element with row, column index.
CKNComplex * GetValue(unsigned int nIndex)
Getting none zero element value by index.
void ResetValue()
Reset every element to zero.
void DumpCSR(const char *pstrFileName)
For debugging save CSR into file.
double_vector_t m_vectValueRealBuffer
A member variable for saving none zero elements.
void Add(CKNComplex complex)
Adding operation to this class.
static void BuildLocalCSR(CKNMatrixOperation::CKNCSR *source, CKNMatrixOperation::CKNCSR *mine, CKNMatrixOperation::CKNCSR *left, CKNMatrixOperation::CKNCSR *right)
static void MergeVectorOptimal(CKNMatrixOperation::CKNVector *pSrcVector, CKNMatrixOperation::CKNVector *pResultVector, unsigned int nMergeSize, double fFirstIndex)
Merge vector to sub rank, operated without vector class member function call.
unsigned int GetRowCount()
CKNComplex * GetAtPt(unsigned int nIndex)
Get element value from specific index.
void SetAt(unsigned int nIndex, CKNComplex value)
Set element value in specific index, Call by value.
bool SetColumnElement(CKNVector vector, unsigned int nColumnIndex)
Set matrix column.
void operator=(CKNVector &vector)
operation overload for subsitution with reference parameter
unsigned int GetSize()
Return Vector elements size.
static unsigned int * pColumn
For MPI Optimized operation using.
bool InsertColumnAtEnd()
Insert column after last column.
void SetRealNumber(double fRealNumber)
Set real part.
static CKNComplex * pValueBuffer
For MPI Optimized operation using.
CKNComplex GetElement(unsigned int nRow, unsigned int nColumn, bool &bResult)
Get Element by index.
const unsigned long ERROR_MALLOC
Error code that means error occur during memory allocation.
This class for complex operation and saving value.
void PlusVector(CKNVector *vector)
Do plus operation between vectors.
void FinishPush()
Insert end index of none zero value index.
bool GetColumnByVector(unsigned int nColumnIndex, CKNMatrixOperation::CKNVector *pVector)
Get column elements.
void SetRowCount(unsigned int nRow)
Settting row size of matrix.
static CKNComplex MulltiplyComplex(CKNComplex complex1, CKNComplex complex2)
Multiple operation between complex numbers.
static void MeasurementStart(MEASUREMENT_INDEX index)
Measurement start for part.
std::vector< unsigned int, boost::alignment::aligned_allocator< unsigned int, 64 > > uint_vector_t
static void AllocateLocalCSR(CKNMatrixOperation::CKNCSR **mine, CKNMatrixOperation::CKNCSR **left, CKNMatrixOperation::CKNCSR **right)
void Finalize()
Deallocating memory for member variable.
bool GetRowByVector(unsigned int nRowIndex, CKNMatrixOperation::CKNVector *pVector)
Get row elements.
CKNVector operator+(CKNVector &vector)
operation overload for vector plus operation with reference parameter
void SetColumnCount(unsigned int nColumn)
Settting column size of matrix.
#define ORBITALS
At Hamiltonian matrix one atom inserted 10 * 10.
uint_vector_t m_vectRow
A member variable for saving row information.
static bool IsSame(double operand1, double operand2, double tol)
Compare two double variable.
static int Gram_schmidt(CKNVector *pVect1, CKNVector *pVect2)
Doing gam schmidt orthogonalization.
bool InsertVector(unsigned int nStartIndex, CKNMatrixOperation::CKNVector *pVector)
void SetComplexNumber(double fReal, double fImaginaray)
Set Complex number using real part and imaginary part.
struct CKNMatrixOperation::CSR_ELEMENT_DATA * LPCSR_ELEMENT_DATA
bool BuildMatrixFirst(unsigned int nRow, unsigned int nColumn)
Building matrix elements.
static CKNMatrixOperation::CKNCSR * BuildCSRFromFile_(FILE *fDataFile, unsigned int nRowOrder, unsigned int nColumnOrder, int nDataCount)
Building CSR from file using int, int, double, double order.
CKNMatrixOperation::CKNDMatrix * pMatrix
void SetImaginaryNumber(double fImaginaryNumber)
Set imagenary part.
Hamiltonian building data.
This class for describing vector for Lanczos method.
static bool VVDot(CKNVector *pVector1, CKNVector *pVector2, CKNComplex *pResult)
Between vectors dot product operation.
static void FreeLocalCSR(CKNMatrixOperation::CKNCSR *mine, CKNMatrixOperation::CKNCSR *left, CKNMatrixOperation::CKNCSR *right)