IPCC  1.0
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
KNMatrixOperation.cpp
Go to the documentation of this file.
1 
7 #include "KNMatrixOperation.h"
8 #include "KNMPIManager.h"
9 #include "KNTimeMeasurement.h"
10 #include "KNIPCCUtility.h"
11 
12 #include "CKNGlobal.h"
13 
14 #include "XeonPhi_header.h"
15 
16 #define LOOP_OPTIMIZE_COUNT 10
17 
18 unsigned int CKNMatrixOperation::CKNCSR::MAX_INDEX = 0xffffffff;
20 unsigned int* CKNMatrixOperation::pRow = NULL;
21 unsigned int* CKNMatrixOperation::pColumn = NULL;
22 
29 {
30  m_nValueCount = 0;
31 }
32 
34 {
35 }
36 
41 {
42  if (nSize == m_nValueCount)
43  return;
44 
45  m_nValueCount = nSize;
46  m_vectValueRealBuffer.resize(nSize);
47  m_vectValueImaginaryBuffer.resize(nSize);
48 }
49 
54 void CKNMatrixOperation::CKNVector::SetAt(unsigned int nIndex, CKNComplex value)
55 {
56  SetAt(nIndex, value.GetRealNumber(), value.GetImaginaryNumber());
57 }
58 
59 void CKNMatrixOperation::CKNVector::SetAtEx(unsigned int nIndex, CKNComplex *pValue)
60 {
61  SetAt(nIndex, pValue->GetRealNumber(), pValue->GetImaginaryNumber());
62 }
63 
69 void CKNMatrixOperation::CKNVector::SetAt(unsigned int nIndex, double fReal, double fImaginary)
70 {
71  if (nIndex > GetSize())
72  {
74  }
75 
76  m_vectValueRealBuffer[nIndex] = fReal;
77  m_vectValueImaginaryBuffer[nIndex] = fImaginary;
78 }
79 
85 {
86  if (nIndex > GetSize())
87  {
88  throw ERROR_OUT_OF_RANGE;
89  return NULL;
90  }
91 
92  m_rtnTemp.SetComplexNumber(m_vectValueRealBuffer[nIndex], m_vectValueImaginaryBuffer[nIndex]);
93  return &m_rtnTemp;
94 }
95 
101 {
102  CKNComplex rtnComplex;
103 
104  if (nIndex > GetSize())
105  {
106  throw ERROR_OUT_OF_RANGE;
107  return rtnComplex;
108  }
109 
110  rtnComplex.SetComplexNumber(m_vectValueRealBuffer[nIndex], m_vectValueImaginaryBuffer[nIndex]);
111  return rtnComplex;
112 }
113 
115 {
116  unsigned int i;
117 
118  for (i = 0; i < GetSize(); i++)
119  {
120  m_vectValueRealBuffer[i] = 0.;
121  m_vectValueImaginaryBuffer[i] = 0.;
122  }
123 
124 }
125 
130 {
131  unsigned int i;
132  CKNComplex tempComplex;
133 
134  for (i = 0; i < GetSize(); i++)
135  {
136  tempComplex.SetComplexNumber(m_vectValueRealBuffer[i], m_vectValueImaginaryBuffer[i]);
137  tempComplex = tempComplex * Scalar;
138 
139  m_vectValueRealBuffer[i] = tempComplex.GetRealNumber();
140  m_vectValueImaginaryBuffer[i] = tempComplex.GetImaginaryNumber();
141  }
142 
143  return;
144 }
145 
150 {
151  //double fReal, fImaginary;
152  unsigned int i;
153 
154  for (i = 0; i < GetSize(); i++)
155  {
156  m_vectValueRealBuffer[i] *= fScalar;
157  m_vectValueImaginaryBuffer[i] *= fScalar;
158  }
159 }
160 
165 {
166  unsigned int i;
167  CKNComplex tempComplex;
168 
169  for (i = 0; i < GetSize(); i++)
170  {
171  tempComplex.SetComplexNumber(m_vectValueRealBuffer[i], m_vectValueImaginaryBuffer[i]);
172  tempComplex = tempComplex / Scalar;
173 
174  m_vectValueRealBuffer[i] = tempComplex.GetRealNumber();
175  m_vectValueImaginaryBuffer[i] = tempComplex.GetImaginaryNumber();
176  }
177 }
178 
183 {
184  double *pReal = NULL, *pImaginary = NULL;
185  unsigned int i;
186  unsigned int nSize = GetSize();
187 
188  pReal = m_vectValueRealBuffer.data();
189  pImaginary = m_vectValueImaginaryBuffer.data();
190 
191 #pragma omp parallel for
192  for (i = 0; i < nSize ; i++)
193  {
194  /*m_vectValueRealBuffer[i] /= fScalar;
195  m_vectValueImaginaryBuffer[i] /= fScalar;*/
196  pReal[i] /= fScalar;
197  pImaginary[i] /= fScalar;
198  }
199 }
200 
202 {
203  unsigned int i;
204 
205  srand((unsigned int)time(NULL));
206  for (i = 0; i < m_nValueCount; i++)
207  {
208  m_vectValueRealBuffer[i] = rand();
209  m_vectValueImaginaryBuffer[i] = rand();
210  }
211 }
212 
217 {
218  double fVectorSize = GetNorm(bMPI);
219  unsigned int i;
220 
221  for (i = 0; i < m_nValueCount; i++)
222  {
223  m_vectValueRealBuffer[i] /= fVectorSize;
224  m_vectValueImaginaryBuffer[i] /= fVectorSize;
225  }
226 }
227 
233 {
234  unsigned int i;
235  CKNComplex PowerTotal;
236  double fTotal = 0.0;
237  unsigned int nLeft = m_nValueCount % LOOP_OPTIMIZE_COUNT;
238  double fNorm;
239  double *pReal = m_vectValueRealBuffer.data();
240  double *pImaginary = m_vectValueImaginaryBuffer.data();
241 
242 #pragma omp parallel for reduction(+:fTotal)
243  for (i = 0; i < m_nValueCount; i++)
244  {
245  const double fReal = pReal[i];
246  const double fImaginary = pImaginary[i];
247 
248  //fNorm = sqrt(m_vectValueRealBuffer[i] * m_vectValueRealBuffer[i] + m_vectValueImaginaryBuffer[i] * m_vectValueImaginaryBuffer[i]);
249  /*fNorm = sqrt(fReal * fReal + fImaginary * fImaginary);
250  fTotal += (fNorm * fNorm);*/
251  fNorm = fReal * fReal + fImaginary * fImaginary;
252  fTotal += fNorm;
253  }
254 
255  if (bMPI)
256  fTotal = CKNMPIManager::AllReduceDouble(fTotal);
257  return sqrt(fTotal);
258 }
259 
265 {
266  unsigned int i, nSize = GetSize();
267  double *pReal = NULL, *pImaginary = NULL;
268  double *pOperandReal = NULL, *pOperandImagianray = NULL;
269 
270  if (nSize != vector->GetSize())
271  {
273  return;
274  }
275 
276  pReal = m_vectValueRealBuffer.data();
277  pImaginary = m_vectValueImaginaryBuffer.data();
278  pOperandReal = vector->m_vectValueRealBuffer.data();
279  pOperandImagianray = vector->m_vectValueImaginaryBuffer.data();
280 
281 #pragma omp parallel for
282  for (i = 0; i < nSize; i++)
283  {
284  pReal[i] = pReal[i] - fScalar * pOperandReal[i];
285  pImaginary[i] = pImaginary[i] - fScalar * pOperandImagianray[i];
286  }
287 }
288 
294 {
295  unsigned int i, nSize = GetSize();
296  double *pReal = NULL, *pImaginary = NULL;
297  double *pOperandReal = NULL, *pOperandImagianray = NULL;
298  double fReal = complex.GetRealNumber(), fImaginary = complex.GetImaginaryNumber();
299 
300  if (nSize != pVector->GetSize())
301  {
303  return;
304  }
305 
306  pReal = m_vectValueRealBuffer.data();
307  pImaginary = m_vectValueImaginaryBuffer.data();
308  pOperandReal = pVector->m_vectValueRealBuffer.data();
309  pOperandImagianray = pVector->m_vectValueImaginaryBuffer.data();
310 
311 #pragma omp parallel for
312  for (i = 0; i < nSize; i++)
313  {
314  pReal[i] = pReal[i] - fReal* pOperandReal[i] + fImaginary * pOperandImagianray[i];
315  pImaginary[i] = pImaginary[i] - fReal * pOperandImagianray[i] - fImaginary * pOperandReal[i];
316  }
317 }
318 
323 {
324  unsigned int i, nSize = GetSize();
325  CKNComplex *pOperand1, *pOperand2;
326 
327  if (nSize != vector->GetSize())
328  {
330  return;
331  }
332 
333  for (i = 0; i < nSize; i++)
334  {
335  pOperand1 = GetAtPt(i);
336  pOperand2 = vector->GetAtPt(i);
337 
338  SetAt(i, pOperand1->GetRealNumber() - pOperand2->GetRealNumber(), pOperand1->GetImaginaryNumber() - pOperand2->GetImaginaryNumber());
339  }
340 }
341 
346 {
347  unsigned int i, nSize = GetSize();
348  CKNComplex *pOperand1, *pOperand2;
349 
350  if (nSize != vector->GetSize())
351  {
353  return;
354  }
355 
356  for (i = 0; i < nSize; i++)
357  {
358  pOperand1 = GetAtPt(i);
359  pOperand2 = vector->GetAtPt(i);
360 
361  SetAt(i, pOperand1->GetRealNumber() + pOperand2->GetRealNumber(), pOperand1->GetImaginaryNumber() + pOperand2->GetImaginaryNumber());
362  }
363 }
364 
366 {
367  m_vectValueRealBuffer.clear();
368  m_vectValueImaginaryBuffer.clear();
369 
370  m_nValueCount = 0;
371 }
372 
379 {
380  bool bRtn = false;
381  unsigned int i;
382 
383  if (nStartIndex > m_nValueCount || nStartIndex + pVector->GetSize() > m_nValueCount)
384  return bRtn;
385 
386  for (i = 0; i < pVector->GetSize(); i++)
387  {
388  m_vectValueRealBuffer[nStartIndex + i] = pVector->GetAt(i).GetRealNumber();;
389  m_vectValueImaginaryBuffer[nStartIndex + i] = pVector->GetAt(i).GetImaginaryNumber();
390  }
391 
392  bRtn = true;
393  return bRtn;
394 }
395 
401 bool CKNMatrixOperation::CKNVector::Serialize(double *pBuffer, bool bStore)
402 {
403  double *pReal = NULL, *pImaginariy = NULL;
404  bool bRtn = false;
405 
406  if( NULL == pBuffer )
407  return bRtn;
408 
409  if( bStore)
410  {
411  pReal = m_vectValueRealBuffer.data();
412  memcpy(pReal, pBuffer, m_nValueCount * sizeof(double));
413 
414  pImaginariy = m_vectValueImaginaryBuffer.data();
415  memcpy(pImaginariy, pBuffer + m_nValueCount, m_nValueCount * sizeof(double));
416  }
417  else
418  {
419  pReal = m_vectValueRealBuffer.data();
420  memcpy(pBuffer, pReal, m_nValueCount * sizeof(double));
421 
422  pImaginariy = m_vectValueImaginaryBuffer.data();
423  memcpy(pBuffer + m_nValueCount, pImaginariy, m_nValueCount * sizeof(double));
424  }
425 
426  bRtn = true;
427  return bRtn;
428 }
429 
435 {
436  return operator*((*vector));
437 }
438 
444 {
445  CKNComplex Rtn;
446  unsigned int i, nSize = GetSize();
447 
448  if (nSize != vector.GetSize())
449  {
451  return Rtn;
452  }
453 
454  for (i = 0; i < nSize; i++)
455  {
456  Rtn += (GetAt(i) * vector.GetAt(i));
457  }
458  return Rtn;
459 }
460 
466 {
467  return operator-((*vector));
468 }
469 
475 {
476  CKNVector rtnVector;
477  unsigned int i, nSize = GetSize();
478 
479  if (nSize != vector.GetSize())
480  {
482  return rtnVector;
483  }
484 
485  rtnVector.SetSize(nSize);
486 
487  for (i = 0; i < nSize; i++)
488  {
489  rtnVector.SetAt(i, GetAt(i) - vector.GetAt(i));
490  }
491  return rtnVector;
492 }
493 
499 {
500  return operator+((*vector));
501 }
502 
508 {
509  CKNVector rtnVector;
510  unsigned int i, nSize = GetSize();
511 
512  if (nSize != vector.GetSize())
513  {
515  return rtnVector;
516  }
517 
518  rtnVector.SetSize(nSize);
519  for (i = 0; i < nSize; i++)
520  {
521  rtnVector.SetAt(i, GetAt(i) + vector.GetAt(i));
522  }
523 
524  return rtnVector;
525 }
526 
532 {
533  operator=((*vector));
534 }
535 
541 {
542  unsigned int i, nSize = vector.GetSize();
543  double *pReal = NULL, *pImaginary = NULL;
544  double *pSourceReal = NULL, *pSourceImagianry = NULL;
545 
546  SetSize(nSize);
547 
548  pReal = m_vectValueRealBuffer.data();
549  pImaginary = m_vectValueImaginaryBuffer.data();
550  pSourceReal = vector.m_vectValueRealBuffer.data();
551  pSourceImagianry = vector.m_vectValueImaginaryBuffer.data();
552 
553 #pragma omp parallel for
554  for (i = 0; i < nSize; i++)
555  {
556  pReal[i] = pSourceReal[i];
557  pImaginary[i] = pSourceImagianry[i];
558  }
559 }
565 
566 
573 {
574 }
575 
577 {
578 }
579 
585 bool CKNMatrixOperation::CKNDMatrix::BuildMatrixFirst(unsigned int nRow, unsigned int nColumn)
586 {
587  bool bRtn = false;
588  unsigned int i, j;
589 
590  if (!m_vectValueBuffer.empty())
591  return bRtn;
592 
593  bRtn = true;
594  for (i = 0; i < nRow; i++)
595  {
596  for (j = 0; j < nColumn; j++)
597  {
598  CKNComplex element;
599  m_vectValueBuffer.push_back(element);
600  }
601  }
602 
603  m_nRowCount = nRow;
604  m_nColumnCount = nColumn;
605 
606  return bRtn;
607 }
608 
615 bool CKNMatrixOperation::CKNDMatrix::SetElement(unsigned int nRow, unsigned int nColumn, CKNComplex element)
616 {
617  return SetElement(nRow, nColumn, element.GetRealNumber(), element.GetImaginaryNumber());
618 }
619 
627 bool CKNMatrixOperation::CKNDMatrix::SetElement(unsigned int nRow, unsigned int nColumn, double fRealNumber, double fImageNumber)
628 {
629  bool bRtn = false;
630 
631  if (nRow > m_nRowCount || nColumn > m_nColumnCount)
632  return bRtn;
633 
634  m_vectValueBuffer[m_nColumnCount*nRow + nColumn].SetComplexNumber(fRealNumber, fImageNumber);
635 
636  bRtn = true;
637  return bRtn;
638 }
639 
650 bool CKNMatrixOperation::CKNDMatrix::SetElement(unsigned int nRowStart, unsigned int nColumnStart, unsigned int nSrcRowStart, unsigned int nSrcColumnStart, unsigned int nSrcRowCount, unsigned int nSrcColumnCount, CKNDMatrix matrix)
651 {
652  bool bRtn = false;
653  unsigned int i, j;
654 
655  if (nRowStart > m_nRowCount || nRowStart + nSrcRowCount > m_nRowCount
656  || nColumnStart > m_nColumnCount || nColumnStart + nSrcColumnCount > m_nColumnCount)
657  return bRtn;
658 
659  for (i = 0; i < nSrcRowCount; i++)
660  {
661  for (j = 0; j < nSrcColumnCount; j++)
662  {
663  CKNComplex complexNumber = matrix.GetElement(nSrcRowStart + i, nSrcColumnStart + j);
664  m_vectValueBuffer[m_nColumnCount*(nRowStart + i) + (nColumnStart + j)] = complexNumber;
665  }
666  }
667 
668  bRtn = true;
669  return bRtn;
670 }
671 
677 {
678  unsigned int nFormerRow = m_nRowCount;
679  unsigned int nFormerColumn = m_nColumnCount;
680  unsigned int i, j;
681  std::vector<CKNComplex> vectTemp;
682 
683  switch (direction)
684  {
685  case ROW_DIRECTION:
686  m_nRowCount += nCount;
687  break;
688  case COLUMN_DIRECTION:
689  m_nColumnCount += nCount;
690  break;
691  }
692 
693  for (i = 0; i < m_nRowCount * m_nColumnCount; ++i)
694  {
695  CKNComplex element;
696  vectTemp.push_back(element);
697  }
698 
699  for (i = 0; i < nFormerRow; ++i)
700  {
701  for (j = 0; j < nFormerColumn; ++j)
702  {
703  CKNComplex element = m_vectValueBuffer[nFormerColumn*i + j];
704  vectTemp[m_nColumnCount*i + j].SetComplexNumber(element.GetRealNumber(), element.GetImaginaryNumber());
705  }
706  }
707 }
708 
714 {
715  unsigned int i;
716  bool bRtn = false;
717 
718  if (vector.GetSize() != m_nRowCount || vector.GetSize() != m_nColumnCount)
719  return bRtn;
720 
721  for (i = 0; i < m_nRowCount; ++i)
722  {
723  SetElement(i, i, vector.GetAt(i));
724  }
725 
726  bRtn = true;
727  return bRtn;
728 }
729 
735 CKNComplex CKNMatrixOperation::CKNDMatrix::GetElement(unsigned int nRowIndex, unsigned int nColumnIndex)
736 {
737  return m_vectValueBuffer[m_nColumnCount*nRowIndex + nColumnIndex];
738 }
739 
745 bool CKNMatrixOperation::CKNDMatrix::SetColumnElement(CKNVector vector, unsigned int nColumnIndex)
746 {
747  bool bRtn = false;
748  unsigned int i;
749 
750  if (vector.GetSize() > m_nRowCount)
751  return bRtn;
752 
753  for (i = 0; i < vector.GetSize(); ++i)
754  {
755  SetElement(i, nColumnIndex, vector.GetAt(i));
756  }
757 
758  bRtn = true;
759  return bRtn;
760 }
761 
767 bool CKNMatrixOperation::CKNDMatrix::SetRowElement(CKNVector vector, unsigned int nRowIndex)
768 {
769  bool bRtn = false;
770  unsigned int i;
771 
772  if (vector.GetSize() > m_nColumnCount)
773  return bRtn;
774 
775  for (i = 0; i < vector.GetSize(); ++i)
776  {
777  SetElement(nRowIndex, i, vector.GetAt(i));
778  }
779 
780  bRtn = true;
781  return bRtn;
782 }
783 
788 {
789  unsigned int i;
790 
791  for (i = 0; i < m_vectValueBuffer.size(); i++)
792  {
793  m_vectValueBuffer[i] = m_vectValueBuffer[i] * Scalar;
794  }
795 }
796 
801 {
802  double fReal, fImaginary;
803  unsigned int i;
804 
805  for (i = 0; i < m_vectValueBuffer.size(); i++)
806  {
807  fReal = m_vectValueBuffer[i].GetRealNumber();
808  fImaginary = m_vectValueBuffer[i].GetImaginaryNumber();
809  m_vectValueBuffer[i].SetComplexNumber(fScalar*fReal, fScalar*fImaginary);
810  }
811 }
812 
813 
820 {
821  bool bRtn = false;
822  unsigned int i;
823 
824  if (nColumnIndex > m_nColumnCount)
825  return bRtn;
826 
827  pVector->SetSize(m_nRowCount);
828  for (i = 0; i < m_nRowCount; ++i)
829  pVector->SetAt(i, m_vectValueBuffer[i*m_nColumnCount + nColumnIndex]);
830 
831  bRtn = true;
832  return bRtn;
833 }
834 
841 {
842  bool bRtn = false;
843  unsigned int i;
844 
845  if (nRowIndex > m_nRowCount)
846  return bRtn;
847 
848  pVector->SetSize(m_nColumnCount);
849  for (i = 0; i < m_nColumnCount; ++i)
850  pVector->SetAt(i, m_vectValueBuffer[nRowIndex*m_nColumnCount + i]);
851 
852  bRtn = true;
853  return bRtn;
854 }
855 
864 bool CKNMatrixOperation::CKNDMatrix::GetSmallMatrix(unsigned int nRowStartIndex, unsigned int nColumnStartIndex, unsigned int nRowCount, unsigned int nColumnCount, CKNMatrixOperation::CKNDMatrix *pMatrix)
865 {
866  bool bRtn = false;
867  unsigned int i, j;
868 
869  if (nRowStartIndex + nRowCount > m_nRowCount)
870  return bRtn;
871 
872  if (nColumnCount + nColumnCount > m_nColumnCount)
873  return bRtn;
874 
875  if (pMatrix->GetColumnCount() < nColumnCount || pMatrix->GetRowCount() < nRowCount)
876  return bRtn;
877 
878 
879  for (i = nRowStartIndex; i < nRowStartIndex + nRowCount; i++)
880  {
881  for (j = nColumnStartIndex; j < nColumnStartIndex + nColumnCount; j++)
882  {
883  pMatrix->SetElement(i - nRowStartIndex, j - nColumnStartIndex, m_vectValueBuffer[i*m_nColumnCount + j]);
884  }
885  }
886 
887  bRtn = true;
888  return bRtn;
889 }
890 
895 {
896  unsigned int nCount = m_vectValueBuffer.size(), i;
897 
898  for (i = 0; i < nCount; ++i)
899  m_vectValueBuffer[i].Division(2.0);
900 }
901 
906 {
907  operator+=((*matrix));
908 }
909 
914 {
915  if (matrix.GetColumnCount() != GetColumnCount() || matrix.GetRowCount() != GetRowCount())
916  return;
917 
918  unsigned int i, j;
919 
920  for (i = 0; i < m_nRowCount; ++i)
921  {
922  for (j = 0; j < m_nColumnCount; ++j)
923  {
924  m_vectValueBuffer[m_nColumnCount*i + j] = m_vectValueBuffer[m_nColumnCount*i + j] + matrix.GetElement(i, j);
925  }
926  }
927 }
928 
933 {
934  operator=((*matrix));
935 }
936 
941 {
942  unsigned int i, j;
943 
944  BuildMatrixFirst(matrix.GetRowCount(), matrix.GetColumnCount());
945 
946  for (i = 0; i < m_nRowCount; i++)
947  {
948  for (j = 0; j < m_nColumnCount; j++)
949  {
950  SetElement(i, j, matrix.GetElement(i, j));
951  }
952  }
953 }
954 
956 {
957  bool bRtn = false;
958  unsigned int i, j, nTemp;
959  CKNComplex tempNumber;
960 
961 
962  if (m_nColumnCount == m_nRowCount)
963  {
964  for (i = 0; i < m_nRowCount; ++i)
965  {
966  for (j = 0; j < m_nColumnCount; ++j)
967  {
968  if (j <= i)
969  continue;
970 
971  tempNumber = m_vectValueBuffer[i*m_nColumnCount + j];
972  m_vectValueBuffer[i*m_nColumnCount + j] = m_vectValueBuffer[j*m_nColumnCount + i];
973  m_vectValueBuffer[j*m_nColumnCount + i] = tempNumber;
974  }
975  }
976  }
977  else
978  {
979  std::vector<CKNComplex> tempVector;
980 
981  for (i = 0; i < m_nColumnCount; i++)
982  {
983  for (j = 0; j < m_nRowCount; j++)
984  {
985  CKNComplex element;
986  tempVector.push_back(element);
987  }
988  }
989 
990 
991  for (i = 0; i < m_nRowCount; ++i)
992  for (j = 0; j < m_nColumnCount; ++j)
993  tempVector[j*m_nRowCount + i] = m_vectValueBuffer[i*m_nColumnCount + j];
994 
995  nTemp = m_vectValueBuffer.size();
996  for (i = 0; i < nTemp; ++i)
997  m_vectValueBuffer[i] = tempVector[i];
998 
999  nTemp = m_nRowCount;
1000  m_nRowCount = m_nColumnCount;
1001  m_nColumnCount = nTemp;
1002  }
1003 
1004  bRtn = true;
1005  return bRtn;
1006 }
1007 
1013 
1020 {
1021  m_nValueCount = 0;
1022  m_nValueStackCount = 0;
1023  m_nRowCount = 0;
1024  m_nColumnCount = 0;
1025  MAX_INDEX = -1;
1026  m_fFirstRowIndex = 0;
1027  nComponentsFirstUnitCell = 0;
1028  nComponentsLastUnitCell = 0;
1029 }
1030 
1032 {
1033 
1034 }
1035 
1037 {
1038  m_nValueCount++;
1039 }
1040 
1042 {
1043  for (unsigned int i = 0; i < m_nRowCount + 1; i++)
1044  m_vectRow.push_back(MAX_INDEX);
1045 }
1046 
1047 int compare(const void *pA, const void *pB)
1048 {
1051 
1052  if (NULL == lpA->pMatrix)
1053  return 1;
1054  if (NULL == lpB->pMatrix)
1055  return -1;
1056 
1057  if (lpA->nColumnIndex > lpB->nColumnIndex)
1058  return 1;
1059  else
1060  return -1;
1061 
1062  return -1;
1063 }
1064 
1072 {
1073  bool bRtn = false;
1074  int nValueCount = 0, i, j, k, nValidIndex[5];
1075  unsigned int nHonSiteIndex = lpData[0].nColumnIndex;
1076  CKNComplex tempNumber;
1077 
1078  for (i = 0; i < 4; ++i)
1079  {
1080  if (NULL != lpData[i + 1].pMatrix)
1081  nValidIndex[nValueCount++] = i + 1;
1082  }
1083  nValidIndex[nValueCount++] = 0;
1084  qsort(lpData, 5, sizeof(CKNMatrixOperation::FILL_MATRIX_DATA), compare);
1085 
1086  for (i = 0; i < ORBITALS; ++i)
1087  {
1088  for (j = 0; j < nValueCount; ++j)
1089  {
1090  if( ATOM_DEFAULT_INDEX == lpData[j].nColumnIndex )
1091  continue;
1092 
1093  for (k = 0; k < ORBITALS; ++k)
1094  {
1095  tempNumber = lpData[j].pMatrix->GetElement(i, k);
1096  if (nHonSiteIndex == lpData[j].nColumnIndex && false == bCopyZeroOnSite && 0 == tempNumber.GetRealNumber() && 0 == tempNumber.GetImaginaryNumber())
1097  continue;
1098 
1099  PushNoneZeroValue(tempNumber.GetRealNumber(), tempNumber.GetImaginaryNumber(), nRow * ORBITALS + i, lpData[j].nColumnIndex *ORBITALS + k);
1100  }
1101  }
1102  }
1103  FinishPush();
1104 
1105  bRtn = true;
1106  return bRtn;
1107 }
1108 
1120 bool CKNMatrixOperation::CKNCSR::PushMatrix(unsigned int nRow, unsigned int nColumn, unsigned int nRowStart, unsigned int nColumnStart, unsigned int nRowCount, unsigned int nColumnCount, CKNMatrixOperation::CKNDMatrix *pMatrix, bool bCopyZero)
1121 {
1122  bool bRtn = false;
1123  CKNComplex tempNumber;
1124  unsigned int i, j;
1125 
1126  if (m_nRowCount < nRow + nRowCount - nRowStart || m_nColumnCount < nColumn + nColumnCount - nColumnStart)
1127  return bRtn;
1128 
1129  for (i = nRowStart; i < nRowCount; i++)
1130  {
1131  for (j = nColumnStart; j < nColumnCount; j++)
1132  {
1133  tempNumber = pMatrix->GetElement(i, j);
1134  if (false == bCopyZero && 0 == tempNumber.GetRealNumber() && 0 == tempNumber.GetImaginaryNumber())
1135  continue;
1136 
1137  PushNoneZeroValue(tempNumber.GetRealNumber(), tempNumber.GetImaginaryNumber(), nRow + i - nRowStart, nColumn + j - nColumnStart);
1138  }
1139  }
1140  FinishPush();
1141 
1142  bRtn = true;
1143  return bRtn;
1144 }
1145 
1154 bool CKNMatrixOperation::CKNCSR::AreaScalarMultiple(unsigned int nRowStart, unsigned int nRowCount, unsigned int nColumnStart, unsigned int nColumnCount, CKNComplex Scalar)
1155 {
1156  bool bRtn = false;
1157  unsigned int i, j;
1158 
1159  for (i = nRowStart; i < nRowStart + nRowCount; ++i)
1160  {
1161  for (j = nColumnStart; j < nColumnStart + nColumnCount; ++j)
1162  {
1163  bRtn = ElementScalarMultiple(i, j, Scalar);
1164  if (!bRtn)
1165  {
1166  return bRtn;
1167  }
1168  }
1169  }
1170 
1171  return bRtn;
1172 }
1173 
1180 bool CKNMatrixOperation::CKNCSR::ElementScalarMultiple(unsigned int nRow, unsigned int nColumn, CKNComplex Scalar)
1181 {
1182  CKNComplex tempNumber;
1183  bool bResult, bRtn = false;;
1184 
1185  tempNumber = GetElement(nRow, nColumn, bResult);
1186  if (!bResult)
1187  return bRtn;
1188 
1189  tempNumber = tempNumber * Scalar;
1190  bRtn = SetAt(tempNumber, nRow, nColumn);
1191 
1192  return bRtn;
1193 }
1194 
1201 bool CKNMatrixOperation::CKNCSR::ElementScalarMultiple(unsigned int nRow, unsigned int nColumn, double fScalar)
1202 {
1203  CKNComplex tempNumber;
1204  bool bResult, bRtn = false;;
1205 
1206  tempNumber = GetElement(nRow, nColumn, bResult);
1207  if (!bResult)
1208  return bRtn;
1209 
1210  tempNumber = tempNumber * fScalar;
1211  bRtn = SetAt(tempNumber, nRow, nColumn);
1212 
1213  return bRtn;
1214 }
1215 
1227 bool CKNMatrixOperation::CKNCSR::InsertMatrix(unsigned int nRow, unsigned int nColumn, unsigned int nRowStart, unsigned int nColumnStart, unsigned int nRowCount, unsigned int nColumnCount, CKNMatrixOperation::CKNDMatrix *pMatrix, bool bCopyZero)
1228 {
1229  unsigned int nMatrixRowCount = pMatrix->GetRowCount();
1230  unsigned int nMatrixColumnCount = pMatrix->GetColumnCount();
1231  bool bRtn = false;
1232  CKNComplex tempNumber;
1233  unsigned int i, j;
1234 
1235  if (m_nRowCount < nRow + nRowCount - nRowStart || m_nColumnCount < nColumn + nColumnCount - nColumnStart)
1236  return bRtn;
1237 
1238  for (i = nRowStart; i < nRowCount; i++)
1239  {
1240  for (j = nColumnStart; j < nColumnCount; j++)
1241  {
1242  tempNumber = pMatrix->GetElement(i, j);
1243  if (false == bCopyZero && 0 == tempNumber.GetRealNumber() && 0 == tempNumber.GetImaginaryNumber())
1244  continue;
1245 
1246  SetAt(tempNumber, nRow + i - nRowStart, nColumn + j - nColumnStart);
1247  }
1248  }
1249 
1250  bRtn = true;
1251  return bRtn;
1252 }
1253 
1258 unsigned int CKNMatrixOperation::CKNCSR::GetRowIndexNo(unsigned int nIndex)
1259 {
1260  if (nIndex > GetRowCount() + 1)
1261  {
1262  throw ERROR_OUT_OF_RANGE;
1263  return MAX_INDEX;
1264  }
1265 
1266  return m_vectRow[nIndex];
1267 }
1268 
1273 unsigned int CKNMatrixOperation::CKNCSR::GetColIndexNo(unsigned int nIndex)
1274 {
1275  if (nIndex > GetNoneZeroCount())
1276  {
1277  throw ERROR_OUT_OF_RANGE;
1278  return MAX_INDEX;
1279  }
1280 
1281  //return m_pnColum[nIndex];
1282  return m_vectColumn[nIndex];
1283 }
1284 
1290 {
1291  CKNComplex dumyValue;
1292  if (nIndex > GetNoneZeroCount())
1293  {
1294  throw ERROR_OUT_OF_RANGE;
1295  return NULL;
1296  }
1297 
1298  m_rtnTemp.SetComplexNumber(m_vectValueRealBuffer[nIndex], m_vectValueImaginaryBuffer[nIndex]);
1299  return &m_rtnTemp;
1300 }
1301 
1308 void CKNMatrixOperation::CKNCSR::PushNoneZeroValue(double fRealValue, double fImaginaryValue, unsigned int nRow, unsigned int nCol)
1309 {
1310  m_vectValueRealBuffer.push_back(fRealValue);
1311  m_vectValueImaginaryBuffer.push_back(fImaginaryValue);
1312  m_vectColumn.push_back(nCol);
1313 
1314  if (MAX_INDEX == GetRowIndexNo(nRow))
1315  m_vectRow[nRow] = m_nValueStackCount;
1316 
1317  m_nValueStackCount++;
1318  m_nValueCount++;
1319 }
1320 
1322 {
1323  m_vectRow[GetRowCount()] = GetNoneZeroCount();
1324 }
1325 
1338 bool CKNMatrixOperation::CKNCSR::ConvertDoubleArray(unsigned int *pRowPtr, unsigned int *pColIndex, double *pNNZValueReal, double *pNNZValueImaginary, unsigned int nNNZSize, unsigned int nRowSize, unsigned int nColSize, unsigned int nFirstIndex, bool bZerobase)
1339 {
1340  bool bRtn = false;
1341  int nAdjustIndex = 0;
1342  int i, j;
1343  int nStartIndex, nEndIndex;
1344  double fReal, fImaginary;
1345 
1346  SetRowCount(nRowSize);
1347  SetColumnCount(nColSize);
1348  BuildDataBuffer();
1349 
1350  if( !bZerobase )
1351  nAdjustIndex = -1;
1352 
1353  for( i = 0; i < nRowSize ; ++ i )
1354  {
1355  nStartIndex = pRowPtr[i] + nAdjustIndex;
1356  nEndIndex = pRowPtr[i+1] + nAdjustIndex;
1357  for( j = nStartIndex ; j < nEndIndex ; ++ j)
1358  {
1359  if( pNNZValueReal )
1360  fReal = pNNZValueReal[j];
1361  else
1362  fReal = 0.;
1363 
1364  if( pNNZValueImaginary )
1365  fImaginary = pNNZValueImaginary[j];
1366  else
1367  fImaginary = 0.;
1368 
1369  PushNoneZeroValue(fReal, fImaginary, i, pColIndex[j] + nAdjustIndex);
1370  }
1371  }
1372 
1373  FinishPush();
1374  SetFirstRowIndex(nFirstIndex + nAdjustIndex);
1375 
1376  bRtn = true;
1377  return bRtn;
1378 }
1379 
1381 {
1382  if (NULL == this)
1383  return;
1384 
1385  if (m_vectColumn.size() > 0)
1386  m_vectColumn.clear();
1387 
1388  if (m_vectValueRealBuffer.size() > 0)
1389  m_vectValueRealBuffer.clear();
1390 
1391  if (m_vectValueImaginaryBuffer.size() > 0)
1392  m_vectValueImaginaryBuffer.clear();
1393 
1394  if (m_vectRow.size() > 0)
1395  m_vectRow.clear();
1396 }
1397 
1403 void CKNMatrixOperation::CKNCSR::ExpandMatrix(unsigned int nMulti, bool bRow, bool bColumn)
1404 {
1405  unsigned int nOriginSize, i;
1406 
1407  if (false == bRow && false == bColumn)
1408  return;
1409 
1410  if (bRow)
1411  {
1412  nOriginSize = GetRowCount();
1413  SetRowCount(nMulti*nOriginSize);
1414 
1415  uint_vector_t temp = m_vectRow;
1416 
1417  m_vectRow.clear();
1418  for (i = 0; i < GetRowCount(); i++)
1419  m_vectRow.push_back(MAX_INDEX);
1420  m_vectRow.push_back(GetNoneZeroCount());
1421 
1422  for (i = 0; i < temp.size() - 1; i++)
1423  m_vectRow[i*nMulti] = temp[i];
1424  }
1425 
1426  if (bColumn)
1427  {
1428  nOriginSize = GetColumnCount();
1429  SetColumnCount(nMulti*nOriginSize);
1430  for (i = 0; i < GetNoneZeroCount(); i++)
1431  m_vectColumn[i] *= nMulti;
1432  }
1433 }
1434 
1440 bool CKNMatrixOperation::CKNCSR::GetNextRowIndexValue(unsigned int nRowFrom, unsigned int &nValueIndex)
1441 {
1442  bool bRtn = false;
1443  unsigned int nRowIndex = nRowFrom;
1444 
1445  if (nRowFrom > m_nRowCount)
1446  return bRtn;
1447 
1448  while (MAX_INDEX == m_vectRow[nRowIndex])
1449  {
1450  nRowIndex++;
1451  if (nRowIndex > m_nRowCount)
1452  return bRtn;
1453  }
1454 
1455  nValueIndex = m_vectRow[nRowIndex];
1456 
1457  bRtn = true;
1458  return bRtn;
1459 }
1460 
1465 {
1466  unsigned int i;
1467 
1468  for (i = nRowFrom; i < GetRowCount(); i++)
1469  {
1470  if (MAX_INDEX != m_vectRow[i])
1471  m_vectRow[i]++;
1472  }
1473 }
1474 
1480 bool CKNMatrixOperation::CKNCSR::SetAt(CKNComplex number, unsigned int nRow, unsigned int nColumn)
1481 {
1482  bool bRtn = false;
1483  unsigned int nIndex;
1484  unsigned int nRowStart;
1485  CKNComplex bufferElement = number, tempElement;
1486  unsigned int bufferColIndex = nColumn;
1487  double_vector_t::iterator valueIndex = m_vectValueRealBuffer.begin();
1488  uint_vector_t::iterator columnIndex = m_vectColumn.begin();
1489 
1490 
1491  if (nRow >= GetRowCount() || nColumn >= GetColumnCount())
1492  return bRtn;
1493 
1494  if (IsNonzeroElement(nRow, nColumn, nIndex))
1495  {
1496  m_vectValueRealBuffer[nIndex] = number.GetRealNumber();
1497  m_vectValueImaginaryBuffer[nIndex] = number.GetImaginaryNumber();
1498  }
1499  else
1500  {
1501  nRowStart = GetRowIndexNo(nRow);
1502  if (MAX_INDEX != nRowStart)
1503  {
1504  unsigned int nInsertPos;
1505 
1506  if (GetColIndexNo(nRowStart) > nColumn)
1507  {
1508  nInsertPos = nRowStart;
1509  }
1510  else
1511  {
1512  nInsertPos = GetNextNonzeroValueIndex(nRow, nColumn);
1513  }
1514 
1515  if (MAX_INDEX == nInsertPos)
1516  {
1517  PushNoneZeroValue(number.GetRealNumber(), number.GetImaginaryNumber(), nRow, nColumn);
1518  FinishPush();
1519  }
1520  else
1521  {
1522  m_vectValueRealBuffer.insert(valueIndex + nInsertPos, bufferElement.GetRealNumber());
1523  m_vectValueImaginaryBuffer.insert(valueIndex + nInsertPos, bufferElement.GetImaginaryNumber());
1524  m_vectColumn.insert(columnIndex + nInsertPos, nColumn);
1525  IncreaseRowIndex(nRowStart + 1);
1526  IncreaseNoneZeroCount();
1527  FinishPush();
1528  }
1529  }
1530  else
1531  {
1532  if (GetNextRowIndexValue(nRow + 1, nRowStart))
1533  {
1534  m_vectValueRealBuffer.insert(valueIndex + nRowStart, bufferElement.GetRealNumber());
1535  m_vectValueImaginaryBuffer.insert(valueIndex + nRowStart, bufferElement.GetImaginaryNumber());
1536  m_vectColumn.insert(columnIndex + nRowStart, nColumn);
1537  m_vectRow[nRow] = nRowStart;
1538  IncreaseRowIndex(nRowStart + 1);
1539  IncreaseNoneZeroCount();
1540  FinishPush();
1541  }
1542  else
1543  {
1544  PushNoneZeroValue(number.GetRealNumber(), number.GetImaginaryNumber(), nRow, nColumn);
1545  FinishPush();
1546  }
1547  }
1548  }
1549 
1550  bRtn = true;
1551  return bRtn;
1552 }
1553 
1560 CKNComplex CKNMatrixOperation::CKNCSR::GetElement(unsigned int nRow, unsigned int nColumn, bool &bResult)
1561 {
1562  CKNComplex elementValue;
1563  unsigned int nSubStart = GetRowIndexNo(nRow), nSubEnd = GetRowIndexNo(nRow + 1), i;
1564 
1565  bResult = false;
1566  if (MAX_INDEX == nSubStart)
1567  return elementValue;
1568 
1569  if (MAX_INDEX == nSubEnd)
1570  {
1571  unsigned int nIndex = nRow + 2;
1572 
1573  while (MAX_INDEX == nSubEnd && nIndex <= GetRowCount())
1574  nSubEnd = GetRowIndexNo(nIndex++);
1575  }
1576 
1577  for (i = nSubStart; i < nSubEnd; i++)
1578  {
1579  if (nColumn == m_vectColumn[i])
1580  {
1581  bResult = true;
1582  elementValue.SetComplexNumber(m_vectValueRealBuffer[i], m_vectValueImaginaryBuffer[i]);
1583  return elementValue;
1584  }
1585  }
1586 
1587  return elementValue;
1588 }
1589 
1590 
1597 bool CKNMatrixOperation::CKNCSR::IsNonzeroElement(unsigned int nRow, unsigned int nColumn, unsigned int &nIndex)
1598 {
1599  bool bRtn = false;
1600  unsigned int i;
1601 
1602  unsigned int nSubStart = GetRowIndexNo(nRow);
1603  unsigned int nSubEnd = GetRowIndexNo(nRow + 1);
1604 
1605  if (0 == GetNoneZeroCount())
1606  return bRtn;
1607 
1608  if (MAX_INDEX == nSubStart)
1609  return bRtn;
1610 
1611  if (MAX_INDEX == nSubEnd)
1612  {
1613  if (!GetNextRowIndexValue(nRow + 2, nSubEnd))
1614  {
1615  return bRtn;
1616  }
1617  }
1618 
1619  for (i = nSubStart; i < nSubEnd; i++)
1620  {
1621  unsigned int nColIndex = GetColIndexNo(i);
1622  if (nColIndex == nColumn)
1623  {
1624  nIndex = i;
1625  bRtn = true;
1626  return bRtn;
1627  }
1628  }
1629 
1630  return bRtn;
1631 }
1632 
1639 unsigned int CKNMatrixOperation::CKNCSR::GetNextNonzeroValueIndex(unsigned int nRow, unsigned int nColumn)
1640 {
1641  unsigned int i;
1642 
1643  unsigned int nSubStart = GetRowIndexNo(nRow), nSubEnd = GetRowIndexNo(nRow + 1);
1644  unsigned int nTemp = nRow;
1645 
1646  if (MAX_INDEX == nSubEnd)
1647  {
1648  if (!GetNextRowIndexValue(nRow + 2, nSubEnd))
1649  {
1650  return MAX_INDEX;
1651  }
1652  }
1653 
1654  for (i = nSubStart; i < nSubEnd; i++)
1655  {
1656  unsigned int nColIndex = GetColIndexNo(i);
1657  if (nColIndex > nColumn)
1658  {
1659  return i;
1660  }
1661  }
1662  return nSubEnd;
1663 }
1664 
1670 {
1671  bool bRtn = false;
1672  unsigned int i;
1673  unsigned int tempRowData, bufferRowData;
1674 
1675  if (nRow >= GetRowCount())
1676  return bRtn;
1677 
1678  bufferRowData = -1;
1679  for (i = nRow; i < GetRowCount(); i++)
1680  {
1681  tempRowData = GetRowIndexNo(i);
1682  m_vectRow[i] = bufferRowData;
1683  bufferRowData = tempRowData;
1684  }
1685  m_vectRow.push_back(bufferRowData);
1686  SetRowCount(GetRowCount() + 1);
1687 
1688  bRtn = true;
1689  return bRtn;
1690 }
1691 
1696 {
1697  m_vectRow[GetRowCount()] = -1;
1698  SetRowCount(GetRowCount() + 1);
1699  m_vectRow.push_back(GetNoneZeroCount());
1700 
1701  return true;
1702 }
1703 
1709 {
1710  bool bRtn = false;
1711  unsigned int i, nNonZeroValueCount;
1712 
1713  if (nColumn >= GetColumnCount())
1714  return bRtn;
1715 
1716  nNonZeroValueCount = GetNoneZeroCount();
1717 
1718  for (i = 0; i < nNonZeroValueCount; i++)
1719  {
1720  if (m_vectColumn[i] >= nColumn)
1721  m_vectColumn[i]++;
1722  }
1723 
1724  SetColumnCount(GetColumnCount() + 1);
1725 
1726  bRtn = true;
1727  return bRtn;
1728 }
1729 
1734 {
1735  SetColumnCount(GetColumnCount() + 1);
1736  return true;
1737 }
1738 
1744 {
1746  unsigned int i, j, nSize = GetRowCount();
1747 
1748  if (nSize != vector.GetSize())
1749  {
1751  return rtnVector;
1752  }
1753 
1754  rtnVector.SetSize(GetRowCount());
1755  for (i = 0; i < nSize; i++)
1756  {
1757  CKNComplex fSubTotal;
1758  unsigned int nSubStart = GetRowIndexNo(i), nSubEnd = GetRowIndexNo(i + 1);
1759 
1760  if (MAX_INDEX == nSubStart)
1761  continue;
1762 
1763  if (MAX_INDEX == nSubEnd)
1764  {
1765  unsigned int nIndex = i + 2;
1766 
1767  while (MAX_INDEX == nSubEnd && nIndex <= GetRowCount())
1768  nSubEnd = GetRowIndexNo(nIndex++);
1769  }
1770 
1771  for (j = nSubStart; j < nSubEnd; j++)
1772  {
1773  unsigned int nColIndex = GetColIndexNo(j);
1774  fSubTotal += (*(GetValue(j))*vector.GetAt(nColIndex));
1775  }
1776  rtnVector.SetAt(i, fSubTotal);
1777  }
1778 
1779  return rtnVector;
1780 }
1781 
1787 {
1788  return operator*((*vector));
1789 }
1795 
1796 
1803 {
1804 }
1805 
1807 {
1808 }
1809 
1810 #define REPEAT_COUNT 1000
1811 
1819 CKNMatrixOperation::CKNCSR * CKNMatrixOperation::BuildCSRFromFileTemp(FILE *fDataFile, unsigned int nRowOrder, unsigned int nColumnOrder, int nDataCount)
1820 {
1822  size_t readSize;
1823  unsigned int i;
1825 
1826  if (NULL == pCSR)
1827  return pCSR;
1828 
1829  pCSR->SetRowCount(nRowOrder);
1830  pCSR->SetColumnCount(nColumnOrder);
1831  pCSR->BuildDataBuffer();
1832 
1833  while (0 != (readSize = fread(Data, sizeof(CSR_ELEMENT_DATA), REPEAT_COUNT, fDataFile)))
1834  {
1835  for (i = 0; i < readSize; i++)
1836  pCSR->PushNoneZeroValue(Data[i].fReal, Data[i].fImaginary, (unsigned int)Data[i].nRow - 1, (unsigned int)Data[i].nColumn - 1);
1837  }
1838 
1839  pCSR->FinishPush();
1840 
1841  return pCSR;
1842 }
1843 
1851 CKNMatrixOperation::CKNCSR * CKNMatrixOperation::BuildCSRFromFile_(FILE *fDataFile, unsigned int nRowOrder, unsigned int nColumnOrder, int nDataCount)
1852 {
1854  size_t readSize;
1855  unsigned int i;
1857  memset(Data, NULL, sizeof(CKNMatrixOperation::CSR_ELEMENT_DATA)*REPEAT_COUNT);
1858 
1859  if (NULL == pCSR)
1860  return pCSR;
1861 
1862  pCSR->SetRowCount(nRowOrder);
1863  pCSR->SetColumnCount(nColumnOrder);
1864  pCSR->BuildDataBuffer();
1865 
1866  while (0 != (readSize = fread(Data, sizeof(CSR_ELEMENT_DATA), REPEAT_COUNT, fDataFile)))
1867  {
1868  for (i = 0; i < readSize; i++)
1869  {
1870  if (0 == Data[i].nRow && 0 == Data[i].nColumn)
1871  break;
1872 
1873  pCSR->PushNoneZeroValue(Data[i].fReal, Data[i].fImaginary, (unsigned int)Data[i].nRow - 1, (unsigned int)Data[i].nColumn - 1);
1874  }
1875  }
1876 
1877  pCSR->FinishPush();
1878 
1879  return pCSR;
1880 }
1881 
1887 int CKNMatrixOperation::Compare(const void *pA, const void *pB)
1888 {
1891 
1892  if (lpA->nRow > lpB->nRow)
1893  return 1;
1894  else if (lpA->nRow < lpB->nRow)
1895  return -1;
1896 
1897  if (lpA->nRow == lpB->nRow)
1898  {
1899  if (lpA->nColumn > lpB->nColumn)
1900  return 1;
1901  else if (lpA->nColumn < lpB->nColumn)
1902  return -1;
1903  else if (lpB->nColumn == lpB->nColumn)
1904  return 0;
1905  }
1906 
1907  return -1;
1908 }
1909 
1917 CKNMatrixOperation::CKNCSR * CKNMatrixOperation::BuildCSRFromFileUnsortdata(FILE *fDataFile, unsigned int nRowOrder, unsigned int nColumnOrder, int nDataCount)
1918 {
1919  LPCSR_ELEMENT_DATA lpData = (LPCSR_ELEMENT_DATA)malloc(sizeof(CSR_ELEMENT_DATA)* nDataCount);
1921  int i;
1922 
1923  if (lpData == NULL)
1924  return pCSR;
1925 
1926  for (i = 0; i < nDataCount; i++)
1927  {
1928  fread(&lpData[i], sizeof(CSR_ELEMENT_DATA), 1, fDataFile);
1929  }
1930 
1931  pCSR->SetRowCount(nRowOrder);
1932  pCSR->SetColumnCount(nColumnOrder);
1933  pCSR->BuildDataBuffer();
1934 
1935  qsort(lpData, nDataCount, sizeof(CSR_ELEMENT_DATA), CKNMatrixOperation::Compare);
1936 
1937  for (i = 0; i < nDataCount; i++)
1938  {
1939  pCSR->PushNoneZeroValue(lpData[i].fReal, lpData[i].fImaginary, (unsigned int)lpData[i].nRow - 1, (unsigned int)lpData[i].nColumn - 1);
1940  }
1941  pCSR->FinishPush();
1942 
1943  return pCSR;
1944 
1945 }
1953 CKNMatrixOperation::CKNCSR * CKNMatrixOperation::BuildCSRFromOneDimArray(double *pReal, double *pImaginary, unsigned int nRowOrder, unsigned int nColumnOrder)
1954 {
1955  unsigned int row, col;
1957 
1958  if (NULL == pCSR)
1959  return pCSR;
1960 
1961  pCSR->SetRowCount(nRowOrder);
1962  pCSR->SetColumnCount(nColumnOrder);
1963  pCSR->BuildDataBuffer();
1964 
1965  unsigned int nIndex = 0;
1966  bool bPushedInRow = false;
1967  for (row = 0; row < nRowOrder; row++)
1968  {
1969  for (col = 0; col < nColumnOrder; col++)
1970  {
1971  if (0 != pReal[row*nColumnOrder + col] || 0 != pImaginary[row*nColumnOrder + col])
1972  {
1973  pCSR->PushNoneZeroValue(pReal[nIndex], pImaginary[nIndex], row, col);
1974  bPushedInRow = true;
1975  }
1976  nIndex++;
1977  }
1978  }
1979  pCSR->FinishPush();
1980 
1981  return pCSR;
1982 }
1983 
1990 {
1991  unsigned int i, j;
1992  unsigned int nSubStart, nSubEnd;
1994 
1995  pCSR->SetColumnCount(GetColumnCount());
1996  pCSR->SetRowCount(nEnd - nStart);
1997 
1998  pCSR->BuildDataBuffer();
1999 
2000  int nRowIndex = 0;
2001  for (i = nStart; i < (unsigned int)nEnd; i++)
2002  {
2003  nSubStart = GetRowIndexNo(i);
2004  nSubEnd = GetRowIndexNo(i + 1);
2005  for (j = nSubStart; j < nSubEnd; j++)
2006  {
2007  //CKNComplex element = m_vectValueBuffer[j];
2008  //pCSR->PushNoneZeroValue(element.GetRealNumber(), element.GetImaginaryNumber(), nRowIndex, m_vectColumn[j]);
2009 
2010  pCSR->PushNoneZeroValue(m_vectValueRealBuffer[i], m_vectValueImaginaryBuffer[i], nRowIndex, m_vectColumn[j]);
2011  }
2012  nRowIndex++;
2013  }
2014  pCSR->FinishPush();
2015 
2016  return pCSR;
2017 
2018 }
2019 
2027 {
2028  bool bRtn = false, bResult;
2029  unsigned int i, j, nRowIndex;
2030  unsigned int nVectorIndex = 0, nVectorEnd = pOperand->GetSize();
2031 
2032  if( bUseSplitVector )
2033  {
2034  if( pOperand->GetSize() != m_nRowCount / 10 )
2035  return bRtn;
2036  }
2037  else
2038  {
2039  if( pOperand->GetSize() != m_nColumnCount / 10 )
2040  return bRtn;
2041 
2042  nVectorIndex = (unsigned int)m_fFirstRowIndex/10;
2043  nVectorEnd = nVectorIndex + m_nRowCount / 10;
2044  }
2045 
2046  nRowIndex = 0;
2047  for ( i = nVectorIndex; i < nVectorEnd ; ++ i )
2048  {
2049  CKNComplex number = pOperand->GetAt(i);
2050  for( j = 0 ; j < ORBITALS ; ++ j )
2051  {
2052  CKNComplex csrElement = GetElement(nRowIndex, nRowIndex + (unsigned int)m_fFirstRowIndex, bResult);
2053  if( !bResult )
2054  return bRtn;
2055 
2056  switch(type)
2057  {
2058  case PLUS:
2059  csrElement = csrElement + number;
2060  break;
2061  case MINUS:
2062  csrElement = csrElement - number;
2063  break;
2064  case MULTIPLE:
2065  csrElement = csrElement * number;
2066  break;
2067  case DIVISION:
2068  csrElement = csrElement / number;
2069  break;
2070  }
2071 
2072  SetAt(csrElement, nRowIndex, nRowIndex + (unsigned int)m_fFirstRowIndex);
2073  nRowIndex++;
2074 
2075  }
2076  }
2077 
2078  bRtn = true;
2079  return bRtn;
2080 }
2081 
2086 {
2087  if (NULL == pCSR)
2088  return;
2089 
2090  pCSR->Finalize();
2091 
2092  delete pCSR;
2093  pCSR = NULL;
2094 }
2095 
2099 void CKNMatrixOperation::CKNCSR::DumpCSR(const char *pstrFileName)
2100 {
2101  FILE *out;
2102  unsigned int i, nCount;
2103 
2104  out = fopen(pstrFileName, "wt");
2105  if (NULL != out)
2106  {
2107  fputs("None Zero Value\r\n", out);
2108  fputs("------------------------------------------\r\n", out);
2109  nCount = GetNoneZeroCount();
2110  for (i = 0; i < nCount; i++)
2111  fprintf(out, "%f + %fi\r\n", m_vectValueRealBuffer[i], m_vectValueImaginaryBuffer[i]);
2112 
2113  fputs("Column\r\n", out);
2114  fputs("------------------------------------------\r\n", out);
2115  nCount = GetNoneZeroCount();
2116  for (i = 0; i < nCount; i++)
2117  fprintf(out, "%u\r\n", m_vectColumn[i]);
2118 
2119  fputs("Row\r\n", out);
2120  fputs("------------------------------------------\r\n", out);
2121  nCount = GetRowCount() + 1;
2122  for (i = 0; i < nCount; i++)
2123  fprintf(out, "%u\r\n", GetRowIndexNo(i));
2124 
2125  fclose(out);
2126  }
2127 }
2128 
2134 void CKNMatrixOperation::MVMul(CKNCSR *pAMatrix, CKNVector *pVector, CKNVector *pResult)
2135 {
2136  unsigned int i, j, nSize = pAMatrix->GetColumnCount();
2137  CKNVector *pOperandVector = NULL, VOperand;
2138  double *pMatrixReal = NULL, *pMatrixImaginary = NULL;
2139  double *pVectorReal = NULL, *pVectorImaginary = NULL;
2140  double *pResultReal = NULL, *pResultImaginary = NULL;
2141  unsigned int *pMatrixRow = NULL, *pMatrixColumn = NULL;
2142 
2143  VOperand = *pVector;
2144  pOperandVector = &VOperand;
2145 
2146 #ifndef DISABLE_MPI_ROUTINE
2147 
2148  VOperand.SetSize(pAMatrix->GetColumnCount());
2149  CKNMPIManager::MergeVector(pVector, &VOperand, pAMatrix->GetColumnCount());
2150  pOperandVector = &VOperand;
2151 #else
2152  pOperandVector = pVector;
2153 #endif
2154 
2155  if (nSize != pOperandVector->GetSize())
2156  {
2158  return;
2159  }
2160 
2161  nSize = pAMatrix->GetRowCount();
2162  pMatrixReal = pAMatrix->m_vectValueRealBuffer.data();
2163  pMatrixImaginary = pAMatrix->m_vectValueImaginaryBuffer.data();
2164  pMatrixRow = pAMatrix->m_vectRow.data();
2165  pMatrixColumn = pAMatrix->m_vectColumn.data();
2166  pVectorReal = pOperandVector->m_vectValueRealBuffer.data();
2167  pVectorImaginary = pOperandVector->m_vectValueImaginaryBuffer.data();
2168  pResultReal = pResult->m_vectValueRealBuffer.data();
2169  pResultImaginary = pResult->m_vectValueImaginaryBuffer.data();
2170 
2171  unsigned int input_real_size = pOperandVector->m_vectValueRealBuffer.size();
2172  unsigned int input_imaginary_size = pOperandVector->m_vectValueImaginaryBuffer.size();
2173  unsigned int output_real_size = pResult->m_vectValueRealBuffer.size();
2174  unsigned int output_imaginary_size = pResult->m_vectValueImaginaryBuffer.size();
2175 
2176  #pragma omp parallel for
2177  for ( i = 0; i < nSize; i++)
2178  {
2179  double real_sum = 0.0;
2180  double imaginary_sum = 0.0;
2181  const unsigned int nSubStart = pMatrixRow[i];
2182  const unsigned int nSubEnd = pMatrixRow[i + 1];
2183 
2184 
2185  for ( j = nSubStart; j < nSubEnd; j++)
2186  {
2187  const unsigned int nColIndex = pMatrixColumn[j];
2188  const double m_real = pMatrixReal[j];
2189  const double m_imaginary = pMatrixImaginary[j];
2190  const double v_real = pVectorReal[nColIndex];
2191  const double v_imaginary = pVectorImaginary[nColIndex];
2192 
2193  real_sum += m_real * v_real - m_imaginary * v_imaginary;
2194  imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2195  }
2196 
2197  pResultReal[i] = real_sum;
2198  pResultImaginary[i] = imaginary_sum;
2199  }
2200 #ifndef DISABLE_MPI_ROUTINE
2201  VOperand.Finalize();
2202 #endif //DISABLE_MPI_ROUTINE
2203 }
2204 
2211 void CKNMatrixOperation::MVMulEx_AsyncCommWithLocalBlocks(CKNMatrixOperation::CKNCSR *mylocalblock, CKNMatrixOperation::CKNCSR *leftlocalblock, CKNMatrixOperation::CKNCSR *rightlocalblock, CKNVector *pVector, CKNVector *pResult, double *X, double *Xrt, double *Xlt)
2212 {
2213  unsigned int nSize, B, Brt, Blt;
2214  double *pMatrixValueReal = NULL, *pOperandVectorReal = NULL, *pResultVectorReal = NULL;
2215  double *pMatrixValueImaginary = NULL, *pOperandVectorImaginary = NULL, *pResultVectorImaginary = NULL;
2216  int tag = 1002;
2217  int myrank = CKNMPIManager::GetCurrentRank();
2218  int ncpus = CKNMPIManager::GetTotalNodeCount();
2219  int left_neighbor = (myrank - 1 + ncpus) % ncpus;
2220  int right_neighbor = (myrank + 1) % ncpus;
2221  MPI_Request req_sr[2];
2222  MPI_Status stat_sr[2];
2223  // XXX jinpil: pRow, pColumn should be local in offload directives
2224  unsigned int *pRow = mylocalblock->m_vectRow.data();
2225  unsigned int *pColumn = mylocalblock->m_vectColumn.data();
2226 
2227 #ifndef _WIN32
2228  __assume_aligned(X, 64);
2229  __assume_aligned(X, 64);
2230  __assume_aligned(Xrt, 64);
2231 
2232  __assume_aligned(pMatrixValueReal, 64);
2233  __assume_aligned(pMatrixValueImaginary, 64);
2234  __assume_aligned(pOperandVectorReal, 64);
2235  __assume_aligned(pOperandVectorImaginary, 64);
2236  __assume_aligned(pResultVectorReal, 64);
2237  __assume_aligned(pResultVectorImaginary, 64);
2238  __assume_aligned(pRow, 64);
2239  __assume_aligned(pColumn, 64);
2240 #endif //_WIN32
2241 
2242  pMatrixValueReal = mylocalblock->m_vectValueRealBuffer.data();
2243  pOperandVectorReal = pVector->m_vectValueRealBuffer.data();
2244  pResultVectorReal = pResult->m_vectValueRealBuffer.data();
2245 
2246  pMatrixValueImaginary = mylocalblock->m_vectValueImaginaryBuffer.data();
2247  pOperandVectorImaginary = pVector->m_vectValueImaginaryBuffer.data();
2248  pResultVectorImaginary = pResult->m_vectValueImaginaryBuffer.data();;
2249 
2250  nSize = mylocalblock->GetRowCount();
2251 
2252  if (nSize != pVector->GetSize())
2253  {
2255  return;
2256  }
2257 
2258  B = nSize;
2259  Brt = 0;
2260  Blt = 0;
2261 
2262  for (int ii = 0; ii< nSize; ii++)
2263  {
2264  X[2 * ii] = pOperandVectorReal[ii];
2265  X[2 * ii + 1] = pOperandVectorImaginary[ii];
2266  }
2267 
2269 
2270  MPI_Irecv(&Brt, 1, MPI_INT, right_neighbor, tag, CKNMPIManager::GetMPIComm(), &req_sr[0]);
2271  MPI_Isend(&B, 1, MPI_INT, left_neighbor, tag, CKNMPIManager::GetMPIComm(), &req_sr[1]);
2272  MPI_Waitall(2, req_sr, stat_sr); // now Brt has B of right neighbor.
2273 
2274  MPI_Irecv(Xrt, 2 * Brt, MPI_DOUBLE, right_neighbor, tag, CKNMPIManager::GetMPIComm(), &req_sr[0]); // receive from bottom neighbor
2275  MPI_Isend(X, 2 * B, MPI_DOUBLE, left_neighbor, tag, CKNMPIManager::GetMPIComm(), &req_sr[1]); // send to top neighbor
2276 
2278 
2279  unsigned int input_size = X_largest * 2;
2280  unsigned int output_real_size = pResult->m_vectValueRealBuffer.size();
2281  unsigned int output_imaginary_size = pResult->m_vectValueImaginaryBuffer.size();
2282 
2283 
2284 #ifdef _ENABLE_PAPI
2285  long long papi_values[4];
2286  PAPI_start(papi_event_set);
2287 #endif
2288 
2290 #pragma offload target(mic:phi_tid) \
2291  nocopy(*pMatrixValueReal : REUSE) \
2292  nocopy(*pMatrixValueImaginary : REUSE) \
2293  nocopy(*pRow : REUSE) \
2294  nocopy(*pColumn : REUSE) \
2295  in(X[0:input_size] : REUSE) \
2296  nocopy(*pResultVectorReal : REUSE) \
2297  nocopy(*pResultVectorImaginary : REUSE)
2298  //*/
2299 #pragma omp parallel for
2300  for (unsigned int i = 0; i < nSize; i++)
2301  {
2302  double real_sum = 0.0;
2303  double imaginary_sum = 0.0;
2304  const unsigned int nSubStart = pRow[i ];
2305  const unsigned int nSubEnd = pRow[i + 1];
2306  for (unsigned int j = nSubStart; j < nSubEnd; j++)
2307  {
2308  const unsigned int nColIndex = pColumn[j];
2309  const double m_real = pMatrixValueReal[j];
2310  const double m_imaginary = pMatrixValueImaginary[j];
2311  const double v_real = X[2 * nColIndex];
2312  const double v_imaginary = X[2 * nColIndex + 1];
2313 
2314  real_sum += m_real * v_real - m_imaginary * v_imaginary;
2315  imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2316  }
2317 
2318  pResultVectorReal[i] = real_sum;
2319  pResultVectorImaginary[i] = imaginary_sum;
2320  }
2321 
2322 #ifdef _ENABLE_PAPI
2323  PAPI_stop(papi_event_set, papi_values);
2324  printf("[LOCAL] L2 access = %lld | L2 miss = %lld | L2 miss rate = %g %\n", papi_values[0], papi_values[1], (papi_values[1] * 100.0) / papi_values[0]);
2325  printf("[LOCAL] L3 access = %lld | L3 miss = %lld | L3 miss rate = %g %\n", papi_values[2], papi_values[3], (papi_values[3] * 100.0) / papi_values[2]);
2326 #endif
2327 
2329 
2330  MPI_Waitall(2, req_sr, stat_sr); // now Xrt has X of right neighbor.
2331 
2332  MPI_Irecv(&Blt, 1, MPI_INT, left_neighbor, tag, CKNMPIManager::GetMPIComm(), &req_sr[0]);
2333  MPI_Isend(&B, 1, MPI_INT, right_neighbor, tag, CKNMPIManager::GetMPIComm(), &req_sr[1]);
2334  MPI_Waitall(2, req_sr, stat_sr); // now Blt has B of left neighbor.
2335 
2336 
2337  MPI_Irecv(Xlt, 2 * Blt, MPI_DOUBLE, left_neighbor, tag, CKNMPIManager::GetMPIComm(), &req_sr[0]);
2338  MPI_Isend(X, 2 * B, MPI_DOUBLE, right_neighbor, tag, CKNMPIManager::GetMPIComm(), &req_sr[1]);
2339 
2341 
2342  pMatrixValueReal = rightlocalblock->m_vectValueRealBuffer.data();
2343  pMatrixValueImaginary = rightlocalblock->m_vectValueImaginaryBuffer.data();
2344  pRow = rightlocalblock->m_vectRow.data();
2345  pColumn = rightlocalblock->m_vectColumn.data();
2346 
2347 #ifdef _ENABLE_PAPI
2348  PAPI_start(papi_event_set);
2349 #endif
2350 
2352 #pragma offload target(mic:phi_tid) \
2353  nocopy(*pMatrixValueReal : REUSE) \
2354  nocopy(*pMatrixValueImaginary : REUSE) \
2355  nocopy(*pRow : REUSE) \
2356  nocopy(*pColumn : REUSE) \
2357  in(Xrt[0:input_size] : REUSE) \
2358  nocopy(*pResultVectorReal : REUSE) \
2359  nocopy(*pResultVectorImaginary : REUSE)
2360  //*/
2361 #pragma omp parallel for
2362  for (unsigned int i = 0; i < nSize; i++)
2363  {
2364  double real_sum = 0.0;
2365  double imaginary_sum = 0.0;
2366  const unsigned int nSubStart = pRow[i ];
2367  const unsigned int nSubEnd = pRow[i + 1];
2368  for (unsigned int j = nSubStart; j < nSubEnd; j++)
2369  {
2370  const unsigned int nColIndex = pColumn[j];
2371  const double m_real = pMatrixValueReal[j];
2372  const double m_imaginary = pMatrixValueImaginary[j];
2373  const double v_real = Xrt[2 * nColIndex];
2374  const double v_imaginary = Xrt[2 * nColIndex + 1];
2375 
2376  real_sum += m_real * v_real - m_imaginary * v_imaginary;
2377  imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2378  }
2379 
2380  pResultVectorReal[i] += real_sum;
2381  pResultVectorImaginary[i] += imaginary_sum;
2382  }
2383 
2384 #ifdef _ENABLE_PAPI
2385  PAPI_stop(papi_event_set, papi_values);
2386  printf("[RIGHT] L2 access = %lld | L2 miss = %lld | L2 miss rate = %g %\n", papi_values[0], papi_values[1], (papi_values[1] * 100.0) / papi_values[0]);
2387  printf("[RIGHT] L3 access = %lld | L3 miss = %lld | L3 miss rate = %g %\n", papi_values[2], papi_values[3], (papi_values[3] * 100.0) / papi_values[2]);
2388 #endif
2389 
2391  MPI_Waitall(2, req_sr, stat_sr); // now Xlt has X of left neighbor.
2393 
2394  pMatrixValueReal = leftlocalblock->m_vectValueRealBuffer.data();
2395  pMatrixValueImaginary = leftlocalblock->m_vectValueImaginaryBuffer.data();
2396  pRow = leftlocalblock->m_vectRow.data();
2397  pColumn = leftlocalblock->m_vectColumn.data();
2398 
2399 #ifdef _ENABLE_PAPI
2400  PAPI_start(papi_event_set);
2401 #endif
2402 
2404 #pragma offload target(mic:phi_tid) \
2405  nocopy(*pMatrixValueReal : REUSE) \
2406  nocopy(*pMatrixValueImaginary : REUSE) \
2407  nocopy(*pRow : REUSE) \
2408  nocopy(*pColumn : REUSE) \
2409  in(Xlt[0:input_size] : REUSE) \
2410  out(pResultVectorReal[0:output_real_size] : REUSE) \
2411  out(pResultVectorImaginary[0:output_imaginary_size] : REUSE)
2412  //*/
2413 #pragma omp parallel for
2414  for (unsigned int i = 0; i < nSize; i++)
2415  {
2416  double real_sum = 0.0;
2417  double imaginary_sum = 0.0;
2418  const unsigned int nSubStart = pRow[i ];
2419  const unsigned int nSubEnd = pRow[i + 1];
2420  for (unsigned int j = nSubStart; j < nSubEnd; j++)
2421  {
2422  const unsigned int nColIndex = pColumn[j];
2423  const double m_real = pMatrixValueReal[j];
2424  const double m_imaginary = pMatrixValueImaginary[j];
2425  const double v_real = Xlt[2 * nColIndex];
2426  const double v_imaginary = Xlt[2 * nColIndex + 1];
2427 
2428  real_sum += m_real * v_real - m_imaginary * v_imaginary;
2429  imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2430  }
2431 
2432  pResultVectorReal[i] += real_sum;
2433  pResultVectorImaginary[i] += imaginary_sum;
2434  }
2435 
2436 #ifdef _ENABLE_PAPI
2437  PAPI_stop(papi_event_set, papi_values);
2438  printf("[LEFT] L2 access = %lld | L2 miss = %lld | L2 miss rate = %g %\n", papi_values[0], papi_values[1], (papi_values[1] * 100.0) / papi_values[0]);
2439  printf("[LEFT] L3 access = %lld | L3 miss = %lld | L3 miss rate = %g %\n", papi_values[2], papi_values[3], (papi_values[3] * 100.0) / papi_values[2]);
2440 #endif
2441 }
2442 
2452 void CKNMatrixOperation::MVMulEx_Optimal(CKNCSR *pAMatrix, CKNVector *pVector, CKNVector *pResult, unsigned int nSizeFromPrevRank, unsigned int nSizeFromNextRank, CKNVector *VTemp, int nSizePHI)
2453 {
2454  unsigned int nSize = pAMatrix->GetColumnCount();
2455  CKNVector *pOperandVector = NULL;
2456  double *pMatrixReal = NULL, *pMatrixImaginary = NULL;
2457  double *pVectorReal = NULL, *pVectorImaginary = NULL;
2458  double *pResultReal = NULL, *pResultImaginary = NULL;
2459  unsigned int *pMatrixRow = NULL, *pMatrixColumn = NULL;
2460  char sigval;
2461  unsigned int input_size1, input_size2, input_size3;
2462  unsigned int input_offset1, input_offset2, input_offset3, offsettmp[3];
2463  unsigned int output_size, output_offset;
2464 
2465 #ifndef DISABLE_MPI_ROUTINE
2466  pOperandVector = VTemp;
2467  pVectorReal = pOperandVector->m_vectValueRealBuffer.data();
2468  pVectorImaginary = pOperandVector->m_vectValueImaginaryBuffer.data();
2469 
2470  // memset(pVectorReal, 0, sizeof(double)*pOperandVector->GetSize());
2471  // memset(pVectorImaginary, 0, sizeof(double)*pOperandVector->GetSize());
2472 
2473  CKNMPIManager::MergeVectorEx_Optimal(pVector, pOperandVector, pAMatrix->GetColumnCount(), pAMatrix->m_fFirstRowIndex, nSizeFromPrevRank, nSizeFromNextRank, pAMatrix->nComponentsFirstUnitCell, pAMatrix->nComponentsLastUnitCell, offsettmp);
2474 
2475 #else
2476  pOperandVector = pVector;
2477 #endif
2478 
2479  if (nSize != pOperandVector->GetSize())
2480  {
2482  return;
2483  }
2484 
2485  pMatrixReal = pAMatrix->m_vectValueRealBuffer.data();
2486  pMatrixImaginary = pAMatrix->m_vectValueImaginaryBuffer.data();
2487  pMatrixRow = pAMatrix->m_vectRow.data();
2488  pMatrixColumn = pAMatrix->m_vectColumn.data();
2489  pResultReal = pResult->m_vectValueRealBuffer.data();
2490  pResultImaginary = pResult->m_vectValueImaginaryBuffer.data();
2491  nSize = pAMatrix->GetRowCount();
2492 
2494  {
2495  input_size1 = pOperandVector->m_vectValueRealBuffer.size();
2496  input_offset1 = 0;
2497 
2498 #pragma offload_transfer target(mic:phi_tid) in(pVectorReal[input_offset1:input_size1] : REUSE)
2499 #pragma offload_transfer target(mic:phi_tid) in(pVectorImaginary[input_offset1:input_size1] : REUSE)
2500  }
2501  else
2502  {
2503  input_size1 = nSizeFromPrevRank;
2504  input_size2 = nSize;
2505  input_size3 = nSizeFromNextRank;
2506  input_offset1 = offsettmp[0];
2507  input_offset2 = offsettmp[1];
2508  input_offset3 = offsettmp[2];
2509 
2510 #pragma offload_transfer target(mic:phi_tid) in(pVectorReal[input_offset1:input_size1] : REUSE)
2511 #pragma offload_transfer target(mic:phi_tid) in(pVectorImaginary[input_offset1:input_size1] : REUSE)
2512 #pragma offload_transfer target(mic:phi_tid) in(pVectorReal[input_offset2:input_size2] : REUSE)
2513 #pragma offload_transfer target(mic:phi_tid) in(pVectorImaginary[input_offset2:input_size2] : REUSE)
2514 #pragma offload_transfer target(mic:phi_tid) in(pVectorReal[input_offset3:input_size3] : REUSE)
2515 #pragma offload_transfer target(mic:phi_tid) in(pVectorImaginary[input_offset3:input_size3] : REUSE)
2516  }
2517 
2518  output_size = nSizePHI;
2519  output_offset = 0;
2520 
2521  // FIXME jinpil:
2522  // nocopy(pMatrixReal : REUSE)
2523  // correct directive syntax, but segmentation fault without *
2524  // Xeon Phi device cannot find the correct pointer value
2525  // compiler bug?
2526  // FIXME allocate pVectorReal, pVectorImaginary outside the Lanczos loop
2528 
2529 #pragma offload target(mic:phi_tid) \
2530  nocopy(*pMatrixReal : REUSE) \
2531  nocopy(*pMatrixImaginary : REUSE) \
2532  nocopy(*pMatrixRow : REUSE) \
2533  nocopy(*pMatrixColumn : REUSE) \
2534  nocopy(*pVectorReal : REUSE) \
2535  nocopy(*pVectorImaginary : REUSE) \
2536  out(pResultReal[output_offset:output_size] : REUSE) \
2537  out(pResultImaginary[output_offset:output_size] : REUSE) \
2538  signal(&sigval)
2539  //*/
2540 
2541 #pragma omp parallel for
2542  for (unsigned int i = 0; i < nSizePHI; i++)
2543  {
2544  double real_sum = 0.0;
2545  double imaginary_sum = 0.0;
2546  const unsigned int nSubStart = pMatrixRow[i];
2547  const unsigned int nSubEnd = pMatrixRow[i + 1];
2548 
2549 
2550  for (unsigned int j = nSubStart; j < nSubEnd; j++)
2551  {
2552  const unsigned int nColIndex = pMatrixColumn[j];
2553  const double m_real = pMatrixReal[j];
2554  const double m_imaginary = pMatrixImaginary[j];
2555  const double v_real = pVectorReal[nColIndex];
2556  const double v_imaginary = pVectorImaginary[nColIndex];
2557 
2558  real_sum += m_real * v_real - m_imaginary * v_imaginary;
2559  imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2560  }
2561 
2562  pResultReal[i] = real_sum;
2563  pResultImaginary[i] = imaginary_sum;
2564  }
2565 
2566 #pragma omp parallel for
2567  for (unsigned int i = nSizePHI; i < nSize; i++)
2568  {
2569  double real_sum = 0.0;
2570  double imaginary_sum = 0.0;
2571  const unsigned int nSubStart = pMatrixRow[i];
2572  const unsigned int nSubEnd = pMatrixRow[i + 1];
2573 
2574 
2575  for (unsigned int j = nSubStart; j < nSubEnd; j++)
2576  {
2577  const unsigned int nColIndex = pMatrixColumn[j];
2578  const double m_real = pMatrixReal[j];
2579  const double m_imaginary = pMatrixImaginary[j];
2580  const double v_real = pVectorReal[nColIndex];
2581  const double v_imaginary = pVectorImaginary[nColIndex];
2582 
2583  real_sum += m_real * v_real - m_imaginary * v_imaginary;
2584  imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2585  }
2586 
2587  pResultReal[i] = real_sum;
2588  pResultImaginary[i] = imaginary_sum;
2589  }
2590 
2591 #pragma offload_wait target(mic:phi_tid) wait(&sigval)
2592 
2593 }
2594 
2600 void CKNMatrixOperation::MVMulOptimal(CKNCSR *pAMatrix, CKNVector *pVector, CKNVector *pResult)
2601 {
2602  unsigned int i, j, nSize = pAMatrix->GetColumnCount();
2603  CKNVector *pOperandVector = NULL;
2604  double *pMatrixReal = NULL, *pMatrixImaginary = NULL;
2605  double *pVectorReal = NULL, *pVectorImaginary = NULL;
2606  double *pResultReal = NULL, *pResultImaginary = NULL;
2607  unsigned int *pMatrixRow = NULL, *pMatrixColumn = NULL;
2608 #ifndef DISABLE_MPI_ROUTINE
2609  CKNVector VOperand;
2610 
2611  VOperand.SetSize(pAMatrix->GetColumnCount());
2612  CKNMPIManager::MergeVectorOptimal(pVector, &VOperand, pAMatrix->GetColumnCount(), pAMatrix->m_fFirstRowIndex);
2613  pOperandVector = &VOperand;
2614 #else
2615  pOperandVector = pVector;
2616 #endif
2617 
2618  if (nSize != pOperandVector->GetSize())
2619  {
2621  return;
2622  }
2623 
2624  nSize = pAMatrix->GetRowCount();
2625  pMatrixReal = pAMatrix->m_vectValueRealBuffer.data();
2626  pMatrixImaginary = pAMatrix->m_vectValueImaginaryBuffer.data();
2627  pMatrixRow = pAMatrix->m_vectRow.data();
2628  pMatrixColumn = pAMatrix->m_vectColumn.data();
2629  pVectorReal = pOperandVector->m_vectValueRealBuffer.data();
2630  pVectorImaginary = pOperandVector->m_vectValueImaginaryBuffer.data();
2631  pResultReal = pResult->m_vectValueRealBuffer.data();
2632  pResultImaginary = pResult->m_vectValueImaginaryBuffer.data();
2633 
2634  unsigned int input_real_size = pOperandVector->m_vectValueRealBuffer.size();
2635  unsigned int input_imaginary_size = pOperandVector->m_vectValueImaginaryBuffer.size();
2636  unsigned int output_real_size = pResult->m_vectValueRealBuffer.size();
2637  unsigned int output_imaginary_size = pResult->m_vectValueImaginaryBuffer.size();
2638 
2639 // FIXME jinpil:
2640 // nocopy(pMatrixReal : REUSE)
2641 // correct directive syntax, but segmentation fault without *
2642 // Xeon Phi device cannot find the correct pointer value
2643 // compiler bug?
2644 // FIXME allocate pVectorReal, pVectorImaginary outside the Lanczos loop
2646 #pragma offload target(mic:phi_tid) \
2647  nocopy(*pMatrixReal : REUSE) \
2648  nocopy(*pMatrixImaginary : REUSE) \
2649  nocopy(*pMatrixRow : REUSE) \
2650  nocopy(*pMatrixColumn : REUSE) \
2651  in(pVectorReal[0:input_real_size] : LOCAL) \
2652  in(pVectorImaginary[0:input_imaginary_size] : LOCAL) \
2653  out(pResultReal[0:output_real_size] : REUSE) \
2654  out(pResultImaginary[0:output_imaginary_size] : REUSE)
2655 //*/
2656 #pragma omp parallel for
2657  for ( i = 0; i < nSize; i++)
2658  {
2659  double real_sum = 0.0;
2660  double imaginary_sum = 0.0;
2661  const unsigned int nSubStart = pMatrixRow[i];
2662  const unsigned int nSubEnd = pMatrixRow[i + 1];
2663 
2664 
2665  for ( j = nSubStart; j < nSubEnd; j++)
2666  {
2667  const unsigned int nColIndex = pMatrixColumn[j];
2668  const double m_real = pMatrixReal[j];
2669  const double m_imaginary = pMatrixImaginary[j];
2670  const double v_real = pVectorReal[nColIndex];
2671  const double v_imaginary = pVectorImaginary[nColIndex];
2672 
2673  real_sum += m_real * v_real - m_imaginary * v_imaginary;
2674  imaginary_sum += m_real * v_imaginary + m_imaginary * v_real;
2675  }
2676 
2677  pResultReal[i] = real_sum;
2678  pResultImaginary[i] = imaginary_sum;
2679  }
2680 #ifndef DISABLE_MPI_ROUTINE
2681  VOperand.Finalize();
2682 #endif //DISABLE_MPI_ROUTINE
2683 }
2684 
2691 bool CKNMatrixOperation::VVDot(CKNVector *pVector1, CKNVector *pVector2, CKNComplex *pResult)
2692 {
2693  double *pOp1Real = NULL, *pOp1Imaginary = NULL;
2694  double *pOp2Real = NULL, *pOp2Imaginary = NULL;
2695  unsigned int i, nSize = pVector1->GetSize();
2696 
2697  if (nSize != pVector2->GetSize())
2698  {
2700  return false;
2701  }
2702 
2703  pOp1Real = pVector1->m_vectValueRealBuffer.data();
2704  pOp1Imaginary = pVector1->m_vectValueImaginaryBuffer.data();
2705 
2706  pOp2Real = pVector2->m_vectValueRealBuffer.data();
2707  pOp2Imaginary = pVector2->m_vectValueImaginaryBuffer.data();
2708 
2709  double fReal = 0., fImaginary = 0.;
2710 #pragma omp parallel for reduction(+:fReal, fImaginary)
2711  for (i = 0; i < nSize; i++)
2712  {
2713  /*fReal += pOp1Real[i] * pOp2Real[i] - pOp1Imaginary[i] * (-1 * pOp2Imaginary[i]);
2714  fImaginary += pOp1Real[i] * (-1 * pOp2Imaginary[i]) + pOp1Imaginary[i] * pOp2Real[i];*/
2715  fReal += pOp1Real[i] * pOp2Real[i] - (-1*pOp1Imaginary[i]) * pOp2Imaginary[i];
2716  fImaginary += pOp1Real[i] * pOp2Imaginary[i] + (-1*pOp1Imaginary[i]) * pOp2Real[i];
2717  }
2718 
2719  pResult->SetComplexNumber(fReal, fImaginary);
2721 
2722  return true;
2723 
2724 }
2725 
2731 void CKNMatrixOperation::MVMul(CKNDMatrix *pMatrix, CKNVector *pVector, CKNVector *pResult)
2732 {
2733  int i, j, nRow, nColumn;
2734 
2735  if (pMatrix->GetColumnCount() != pVector->GetSize())
2736  return;
2737 
2738  pResult->SetSize(pVector->GetSize());
2739 
2740  nRow = pMatrix->GetRowCount();
2741  nColumn = pMatrix->GetColumnCount();
2742  for (i = 0; i < nRow; ++i)
2743  {
2744  CKNComplex result;
2745  for (j = 0; j < nColumn; ++j)
2746  {
2747  result.Add(CKNComplex::MulltiplyComplex(pMatrix->GetElement(i, j), pVector->GetAt(j)));
2748  }
2749  pResult->SetAt(i, result);
2750  }
2751 }
2752 
2758 void CKNMatrixOperation::MMMul(CKNDMatrix *pMatrix, CKNDMatrix *pMatrixOperand, CKNDMatrix *pResult)
2759 {
2760  int i, j, k;
2761  int nRow, nColumn, nL;
2762  if (pMatrix->GetColumnCount() != pMatrixOperand->GetRowCount())
2763  return;
2764 
2765  pResult->BuildMatrixFirst(pMatrix->GetRowCount(), pMatrixOperand->GetColumnCount());
2766 
2767  nL = pMatrixOperand->GetColumnCount();
2768  nRow = pMatrix->GetRowCount();
2769  nColumn = pMatrix->GetColumnCount();
2770  for (k = 0; k < nL; ++k)
2771  {
2772  for (i = 0; i < nRow; ++i)
2773  {
2774  CKNComplex result;
2775  for (j = 0; j < nColumn; ++j)
2776  {
2777  result.Add(CKNComplex::MulltiplyComplex(pMatrix->GetElement(i, j), pMatrixOperand->GetElement(j, k)));
2778  }
2779  pResult->SetElement(i, k, result);
2780  }
2781  }
2782 }
2783 
2789 bool CKNMatrixOperation::IsSame(double operand1, double operand2, double tol)
2790 {
2791  if (fabs(operand1 - operand2) > tol)
2792  return false;
2793  else
2794  return true;
2795 }
2796 
2802 bool CKNMatrixOperation::IsSameA(double operand1, double operand2, double tol)
2803 {
2804  if (fabs(fabs(operand1) - fabs(operand2)) > tol)
2805  return false;
2806  else
2807  return true;
2808 }
2809 
2815 {
2816  CKNComplex result;
2817  CKNVector tempVector;
2818 
2819  CKNMatrixOperation::VVDot(pVect1, pVect2, &result);
2820  if( IsSameA(result.GetRealNumber(), 0, GENERAL_TOLERANCE ))
2821  return 1;
2822 
2823  tempVector = *pVect1;
2824  tempVector.Normalize(true);
2825  pVect2->Normalize(true);
2826 
2827  CKNMatrixOperation::VVDot(&tempVector, pVect2, &result);
2828  tempVector.ScalarMultiple(result);
2829 
2830  pVect2->MinusVector(&tempVector);
2831  pVect2->Normalize(true);
2832 
2833  CKNMatrixOperation::VVDot(pVect1, pVect2, &result);
2834  if( IsSameA(result.GetRealNumber(), 0, GENERAL_TOLERANCE ))
2835  return 1;
2836  else
2837  return 0;
2838 
2839 
2840 }
2841 
2848 {
2849  vector.ScalarMultiple(fScalar);
2850  return vector;
2851 }
2852 
2859 {
2860  vector.ScalarDivision(fScalar);
2861  return vector;
2862 }
2868 
2875 {
2876  unsigned int nRowCount, nColumnCount;
2877  int myrank = CKNMPIManager::GetCurrentRank();
2878  int ncpus = CKNMPIManager::GetTotalNodeCount();
2879  int left_neighbor = (myrank-1+ncpus)%ncpus; // top neighbor
2880  int right_neighbor = (myrank+1)%ncpus; // bottom neighbor
2881  unsigned int temp;
2882 
2883  nRowCount = CKNMPIManager::GetLoadBalanceCount(myrank);
2884  *mine = NULL; *left = NULL; *right = NULL;
2885 
2886  // Allocate mine
2887  *mine = new CKNMatrixOperation::CKNCSR();
2888  if (*mine == NULL)
2889  throw ERROR_MALLOC;
2890  nColumnCount = CKNMPIManager::GetLoadBalanceCount(myrank);
2891  (*mine)->SetRowCount(nRowCount);
2892  (*mine)->SetColumnCount(nColumnCount);
2893  (*mine)->BuildDataBuffer(); temp = 0;
2894  for (int jj=0; jj<myrank; jj++)
2896  (*mine)->SetFirstRowIndex((double)temp); // FirstRowIndex will be used in a bit different way: Starting "column" index.
2897 
2898 // printf("%d %d %d %d %d %d\n", myrank, left_neighbor, right_neighbor, (int)(*mine)->GetFirstRowIndex(), (*mine)->GetColumnCount(), (*mine)->GetNoneZeroCount());
2899 
2900  // Allocate left
2901  *left = new CKNMatrixOperation::CKNCSR();
2902  if (*left == NULL)
2903  throw ERROR_MALLOC;
2904  nColumnCount = CKNMPIManager::GetLoadBalanceCount(left_neighbor);
2905  (*left)->SetRowCount(nRowCount);
2906  (*left)->SetColumnCount(nColumnCount);
2907  (*left)->BuildDataBuffer(); temp = 0;
2908  for (int jj=0; jj<left_neighbor; jj++)
2910  (*left)->SetFirstRowIndex((double)temp); // FirstRowIndex will be used in a bit different way: Starting "column" index.
2911 
2912  //Allocate right
2913  *right = new CKNMatrixOperation::CKNCSR();
2914  if (*right == NULL)
2915  throw ERROR_MALLOC;
2916  nColumnCount = CKNMPIManager::GetLoadBalanceCount(right_neighbor);
2917  (*right)->SetRowCount(nRowCount);
2918  (*right)->SetColumnCount(nColumnCount);
2919  (*right)->BuildDataBuffer(); temp = 0;
2920  for(int jj=0; jj<right_neighbor; jj++)
2922  (*right)->SetFirstRowIndex((double)temp); // FirstRowIndex will be used in a bit different way: Starting "column" index.
2923 
2924  MPI_Barrier(CKNMPIManager::GetMPIComm());
2925 }
2926 
2934 {
2935  CKNMatrixOperation::pRow = source->m_vectRow.data();
2936  CKNMatrixOperation::pColumn = source->m_vectColumn.data();
2937  //CKNMemoryManager::LPVECTOR_ELEMENTS lpMatrixValueElement = NULL;
2938  CKNComplex *pData = NULL;
2939  unsigned int my_nnz, left_nnz, right_nnz;
2940  int isthisrowfilled;
2941 
2942  //lpMatrixValueElement = source->GetValueElement();
2943 
2944  // 1. Build left local block.
2945 
2946  left_nnz = 0;
2947 
2948  for (unsigned int ii = 0; ii < source->GetRowCount(); ii++)
2949  {
2950  isthisrowfilled = -1;
2951  unsigned int nSubStart = CKNMatrixOperation::pRow[ii], nSubEnd = CKNMatrixOperation::pRow[ii+1];
2952  unsigned int startColIndex = (int)left->GetFirstRowIndex(), endColIndex = startColIndex + left->GetColumnCount() - 1;
2953 
2954  for (unsigned int jj = nSubStart; jj < nSubEnd; jj++)
2955  {
2956  unsigned int nColIndex = CKNMatrixOperation::pColumn[jj];
2957 
2958  if(startColIndex <= nColIndex && nColIndex <= endColIndex)
2959  {
2960  isthisrowfilled = 0;
2961  left_nnz++;
2962  pData = source->GetValue(jj);
2963  //left->PushNoneZeroValue(lpMatrixValueElement->pfReal[jj], lpMatrixValueElement->pfImaginary[jj], ii, nColIndex-startColIndex);
2964  left->PushNoneZeroValue(pData->GetRealNumber(), pData->GetImaginaryNumber(), ii, nColIndex-startColIndex);
2965  }
2966  }
2967 
2968  if(isthisrowfilled == -1)
2969  {
2970  left_nnz++;
2971  left->PushNoneZeroValue(0.0, 0.0, ii, 0);
2972  }
2973  }
2974 
2975  left->FinishPush();
2976 
2977 // if(CKNMPIManager::IsRootRank())
2978 // printf("Left block conversion completed: left_nnz = %d (computed), %d (CSR-allocated)\n", left_nnz, left->GetNoneZeroCount());
2979 
2980  // 2. Build right block
2981 
2982  right_nnz = 0;
2983 
2984  for (unsigned int ii = 0; ii < source->GetRowCount(); ii++)
2985  {
2986  isthisrowfilled = -1;
2987  unsigned int nSubStart = CKNMatrixOperation::pRow[ii], nSubEnd = CKNMatrixOperation::pRow[ii+1];
2988  unsigned int startColIndex = (int)right->GetFirstRowIndex(), endColIndex = startColIndex + right->GetColumnCount() - 1;
2989 
2990  for (unsigned int jj = nSubStart; jj < nSubEnd; jj++)
2991  {
2992  unsigned int nColIndex = CKNMatrixOperation::pColumn[jj];
2993  if(startColIndex <= nColIndex && nColIndex <= endColIndex)
2994  {
2995  isthisrowfilled = 0;
2996  right_nnz++;
2997  pData = source->GetValue(jj);
2998  //right->PushNoneZeroValue(lpMatrixValueElement->pfReal[jj], lpMatrixValueElement->pfImaginary[jj], ii, nColIndex-startColIndex);
2999  right->PushNoneZeroValue(pData->GetRealNumber(), pData->GetImaginaryNumber(), ii, nColIndex-startColIndex);
3000  }
3001  }
3002 
3003  if(isthisrowfilled == -1)
3004  {
3005  right_nnz++;
3006  right->PushNoneZeroValue(0.0, 0.0, ii, 0);
3007  }
3008  }
3009 
3010  right->FinishPush();
3011 
3012 // if(CKNMPIManager::IsRootRank())
3013 // printf("Right block conversion completed: right_nnz = %d (computed), %d (CSR-allocated)\n", right_nnz, right->GetNoneZeroCount());
3014 
3015  // 3. Build my block
3016 
3017  my_nnz = 0;
3018 
3019  for (unsigned int ii = 0; ii < source->GetRowCount(); ii++)
3020  {
3021  unsigned int nSubStart = CKNMatrixOperation::pRow[ii], nSubEnd = CKNMatrixOperation::pRow[ii+1];
3022  unsigned int startColIndex = (int)mine->GetFirstRowIndex(), endColIndex = startColIndex + mine->GetColumnCount() - 1;
3023 
3024  for(unsigned int jj = nSubStart; jj < nSubEnd; jj++)
3025  {
3026  unsigned int nColIndex = CKNMatrixOperation::pColumn[jj];
3027  if(startColIndex <= nColIndex && nColIndex <= endColIndex)
3028  {
3029  my_nnz++;
3030  pData = source->GetValue(jj);
3031  //mine->PushNoneZeroValue(lpMatrixValueElement->pfReal[jj], lpMatrixValueElement->pfImaginary[jj], ii, nColIndex-startColIndex);
3032  mine->PushNoneZeroValue(pData->GetRealNumber(), pData->GetImaginaryNumber(), ii, nColIndex-startColIndex);
3033  }
3034  }
3035  }
3036 
3037  mine->FinishPush();
3038 
3039 // if(CKNMPIManager::IsRootRank())
3040 // printf("My block conversion completed: my_nnz = %d (computed), %d (CSR-allocated)\n", my_nnz, mine->GetNoneZeroCount());
3041 
3042 }
3043 
3051 {
3052  CKNMatrixOperation::pRow = source->m_vectRow.data();
3053  CKNMatrixOperation::pColumn = source->m_vectColumn.data();
3054  //CKNMemoryManager::LPVECTOR_ELEMENTS lpMatrixValueElement = NULL;
3055  CKNComplex *pData = NULL;
3056  unsigned int my_nnz, left_nnz, right_nnz;
3057  CKNComplex curval;
3058  int isthisrowfilled;
3059 
3060  //lpMatrixValueElement = source->GetValueElement();
3061 
3062  // 1. Update left block
3063 
3064  left_nnz = 0;
3065 
3066  for (unsigned int ii = 0; ii < source->GetRowCount(); ii++)
3067  {
3068  isthisrowfilled = -1;
3069  unsigned int nSubStart = CKNMatrixOperation::pRow[ii], nSubEnd = CKNMatrixOperation::pRow[ii+1];
3070  unsigned int startColIndex = (int)left->GetFirstRowIndex(), endColIndex = startColIndex + left->GetColumnCount() - 1;
3071 
3072  for (unsigned int jj = nSubStart; jj < nSubEnd; jj++)
3073  {
3074  unsigned int nColIndex = CKNMatrixOperation::pColumn[jj];
3075 
3076  if(startColIndex <= nColIndex && nColIndex <= endColIndex)
3077  {
3078  isthisrowfilled = 0;
3079  left_nnz++;
3080  //curval.SetRealNumber(lpMatrixValueElement->pfReal[jj]); curval.SetImaginaryNumber(lpMatrixValueElement->pfImaginary[jj]);
3081  pData = source->GetValue(jj);
3082  curval.SetRealNumber(pData->GetRealNumber()); curval.SetImaginaryNumber(pData->GetImaginaryNumber());
3083  left->SetAt(curval, ii, nColIndex-startColIndex);
3084  }
3085  }
3086 
3087  if(isthisrowfilled == -1)
3088  {
3089  left_nnz++;
3090  curval.SetRealNumber(0.0); curval.SetImaginaryNumber(0.0);
3091  left->SetAt(curval, ii, 0);
3092  }
3093  }
3094 
3095 // if(CKNMPIManager::IsRootRank())
3096 // printf("Left block update completed: left_nnz = %d (computed), %d (CSR-allocated)\n", left_nnz, left->GetNoneZeroCount());
3097 
3098  // 2. Figure out nnz: right block
3099 
3100  right_nnz = 0;
3101 
3102  for (unsigned int ii = 0; ii < source->GetRowCount(); ii++)
3103  {
3104  isthisrowfilled = -1;
3105  unsigned int nSubStart = CKNMatrixOperation::pRow[ii], nSubEnd = CKNMatrixOperation::pRow[ii+1];
3106  unsigned int startColIndex = (int)right->GetFirstRowIndex(), endColIndex = startColIndex + right->GetColumnCount() - 1;
3107 
3108  for (unsigned int jj = nSubStart; jj < nSubEnd; jj++)
3109  {
3110  unsigned int nColIndex = CKNMatrixOperation::pColumn[jj];
3111  if(startColIndex <= nColIndex && nColIndex <= endColIndex)
3112  {
3113  isthisrowfilled = 0;
3114  right_nnz++;
3115  //curval.SetRealNumber(lpMatrixValueElement->pfReal[jj]); curval.SetImaginaryNumber(lpMatrixValueElement->pfImaginary[jj]);
3116  pData = source->GetValue(jj);
3117  curval.SetRealNumber(pData->GetRealNumber()); curval.SetImaginaryNumber(pData->GetImaginaryNumber());
3118  right->SetAt(curval, ii, nColIndex-startColIndex);
3119  }
3120 
3121  }
3122 
3123  if(isthisrowfilled == -1)
3124  {
3125  right_nnz++;
3126  curval.SetRealNumber(0.0); curval.SetImaginaryNumber(0.0);
3127  right->SetAt(curval, ii, 0);
3128  }
3129  }
3130 
3131 // if(CKNMPIManager::IsRootRank())
3132 // printf("Right block update completed: right_nnz = %d (computed), %d (CSR-allocated)\n", right_nnz, right->GetNoneZeroCount());
3133 
3134  // 3. Figure out nnz: my block
3135 
3136  my_nnz = 0;
3137 
3138  for (unsigned int ii = 0; ii < source->GetRowCount(); ii++)
3139  {
3140  unsigned int nSubStart = CKNMatrixOperation::pRow[ii], nSubEnd = CKNMatrixOperation::pRow[ii+1];
3141  unsigned int startColIndex = (int)mine->GetFirstRowIndex(), endColIndex = startColIndex + mine->GetColumnCount() - 1;
3142 
3143  for(unsigned int jj = nSubStart; jj < nSubEnd; jj++)
3144  {
3145  unsigned int nColIndex = CKNMatrixOperation::pColumn[jj];
3146  if(startColIndex <= nColIndex && nColIndex <= endColIndex)
3147  {
3148  my_nnz++;
3149  //curval.SetRealNumber(lpMatrixValueElement->pfReal[jj]); curval.SetImaginaryNumber(lpMatrixValueElement->pfImaginary[jj]);
3150  pData = source->GetValue(jj);
3151  curval.SetRealNumber(pData->GetRealNumber()); curval.SetImaginaryNumber(pData->GetImaginaryNumber());
3152  mine->SetAt(curval, ii, nColIndex-startColIndex);
3153  }
3154  }
3155  }
3156 
3157 // if(CKNMPIManager::IsRootRank())
3158 // printf("My block update completed: my_nnz = %d (computed), %d (CSR-allocated)\n", my_nnz, mine->GetNoneZeroCount());
3159 };
3160 
3167 {
3168  if (mine != NULL)
3169  {
3171  mine = NULL;
3172  }
3173  if (left != NULL)
3174  {
3176  left = NULL;
3177  }
3178  if (right != NULL)
3179  {
3181  right = NULL;
3182  }
3183 }
3184 
3190 bool CKNMatrixOperation::IsSame(CKNComplex operand1, CKNComplex operand2, double tol)
3191 {
3192  if( fabs(fabs(operand1.GetRealNumber()) - fabs(operand2.GetRealNumber())) > tol )
3193  return false;
3194 
3195  if( fabs(fabs(operand1.GetImaginaryNumber()) - fabs(operand2.GetImaginaryNumber())) > tol )
3196  return false;
3197  else
3198  return true;
3199 }
3200 
3207 {
3208  CKNVector vectorTemp = *pVector1;
3209  double fNorm = 1.0;
3210 
3211  vectorTemp.MinusVector(pVector2);
3212 #ifndef DISABLE_MPI_ROUTINE
3213  fNorm = vectorTemp.GetNorm(true);
3214 #else //DISABLE_MPI_ROUTINE
3215  fNorm = vectorTemp.GetNorm();
3216 #endif //DISABLE_MPI_ROUTINE
3217 
3218  if (IsSame(fNorm, 0.0, GENERAL_TOLERANCE))
3219  return true;
3220  else
3221  return false;
3222 }
bool SetDiagonal(CKNVector vector)
Set diagonal elements.
~CKNMatrixOperation()
Destructor.
void SetSize(unsigned int nSize)
Set Vector elements size.
bool InsertMatrix(unsigned int nRow, unsigned int nColumn, unsigned int nRowStart, unsigned int nColumnStart, unsigned int nRowCount, unsigned int nColumnCount, CKNMatrixOperation::CKNDMatrix *pMatrix, bool bCopyZero)
Set element by reference matrix.
double GetImaginaryNumber() const
Get imaginary part.
Definition: KNComplex.h:27
void ScalarDivision(CKNComplex Scalar)
Scalar division operation.
#define THROW_END_EXIT(EXCEPTION_NAME)
< Macro for exception throw than exit program
Definition: CKNGlobal.h:11
unsigned int m_nValueCount
A numbers of elements.
void AppendMatrix(APPEND_DRIECTION direction, unsigned int nCount)
Appending matrix with direction.
void ScalarMultiple(CKNComplex Scalar)
Scalar multiple operation.
void Normalize(bool bMPI=false)
Normalize vector with norm.
CKNCSR * SplitCSR(int nStart, int nEnd)
Split CSR to MPI slave.
CKNMatrixOperation::CKNVector operator/(CKNMatrixOperation::CKNVector &vector, const CKNComplex fScalar)
void IncreaseNoneZeroCount()
Increasing saved none zero elements count.
void ReorthogonalizationVector(CKNVector *pVector, CKNComplex complex)
Do reorthogonalization.
void ScalarMultiThanMinusVector(double fScalar, CKNVector *vector)
Do minus operation after scalar multiple to operand between vectors.
static void AllReduceComlex(CKNComplex *pNumber, CKNTimeMeasurement::MEASUREMENT_INDEX INDEX=CKNTimeMeasurement::COMM)
Do all reduce function with CKNComplex.
bool GetSmallMatrix(unsigned int nRowStartIndex, unsigned int nColumnStartIndex, unsigned int nRowCount, unsigned int nColumnCount, CKNMatrixOperation::CKNDMatrix *pMatrix)
Get matrix from large matrix.
unsigned int nComponentsFirstUnitCell
Atom counts for interoperaton with previous node.
static MPI_Comm GetMPIComm()
Definition: KNMPIManager.h:67
Data and operation representation of Matrix.
unsigned int X_largest
static void FreeCSR(CKNMatrixOperation::CKNCSR *pCSR)
Deallocating CSR memory.
bool InsertRowAtEnd()
Insert row after last row.
static unsigned int * pRow
For MPI Optimized operation using.
double_vector_t m_vectValueRealBuffer
A member variable for saving none zero elements.
void ExpandMatrix(unsigned int nMulti, bool bRow, bool bColumn)
Expand matrix order.
#define LOOP_OPTIMIZE_COUNT
double m_fFirstRowIndex
First row index in this node.
double GetRealNumber() const
Get real part.
Definition: KNComplex.h:26
void operator+=(CKNDMatrix &matrix)
operation overload for adding with reference parameter
static CKNMatrixOperation::CKNCSR * BuildCSRFromFileTemp(FILE *fDataFile, unsigned int nRowOrder, unsigned int nColumnOrder, int nDataCount)
Building CSR from file using double, double, double, double order.
unsigned int GetColumnCount()
Getting row size of matrix.
int compare(const void *pA, const void *pB)
Data and operation representation of CSR(Compressed Sparse Row)
bool PushMatrix(unsigned int nRow, unsigned int nColumn, unsigned int nRowStart, unsigned int nColumnStart, unsigned int nRowCount, unsigned int nColumnCount, CKNMatrixOperation::CKNDMatrix *pMatrix, bool bCopyZero)
Set element by reference matrix to end of buffer.
static void MeasurementEnd(MEASUREMENT_INDEX index)
Measurement end for part.
CKNVector operator-(CKNVector &vector)
operation overload for vector minus operation with reference parameter
unsigned int GetRowCount()
Get matrix row counts.
bool IsNonzeroElement(unsigned int nRow, unsigned int nColumn, unsigned int &nIndex)
Checking given index element has nonzero value or not.
static void MVMulEx_Optimal(CKNCSR *pAMatrix, CKNVector *pVector, CKNVector *pResult, unsigned int, unsigned int, CKNVector *, int)
Matrix and vector multiple operation for 1 layer exchanging communication.
double_vector_t m_vectValueImaginaryBuffer
A member variable for saving none zero elements.
static void MergeVector(CKNMatrixOperation::CKNVector *pVector, CKNMatrixOperation::CKNVector *pResultVector, unsigned int nMergeSize)
Merge vector to sub rank.
static int GetTotalNodeCount()
Definition: KNMPIManager.h:44
void ScalarMultiple(CKNComplex Scalar)
Scalar multiple operation.
bool ConvertDoubleArray(unsigned int *pRowPtr, unsigned int *pColIndex, double *pNNZValueReal, double *pNNZValueImaginary, unsigned int nNNZSize, unsigned int nRowSize, unsigned int nColSize, unsigned int nFirstIndex, bool bZerobase)
Convering general CSR format to CSR.
static CKNMatrixOperation::CKNCSR * BuildCSRFromOneDimArray(double *pReal, double *pImaginary, unsigned int nRowOrder, unsigned int nColumnOrder)
Building CSR from one dimension array.
static int GetLoadBalanceCount(int nRank)
#define GENERAL_TOLERANCE
General tolerance definition.
Definition: CKNGlobal.h:48
static double AllReduceDouble(double fNumber)
Do all reduce function with CKNComplex.
const unsigned long ERROR_OUT_OF_RANGE
Error code that means during access vector or matrix input index out of range.
Definition: CKNGlobal.h:63
void MinusVector(CKNVector *vector)
Do minus operation between vectors.
uint_vector_t m_vectColumn
A member variable for saving column information.
bool ElementScalarMultiple(unsigned int nRow, unsigned int nColumn, CKNComplex Scalar)
Scalar multiple operation.
void BuildDataBuffer()
Allocating memory for class member variable.
static void MMMul(CKNDMatrix *pMatrix, CKNDMatrix *pMatrixOperand, CKNDMatrix *pResult)
Matrix and matrix multiple operation.
bool AreaScalarMultiple(unsigned int nRowStart, unsigned int nRowCount, unsigned int nColumnStart, unsigned int nColumnCount, CKNComplex Scalar)
Scalar multiple operation to specific area.
unsigned int GetNextNonzeroValueIndex(unsigned int nRow, unsigned int nColumn)
Get next index of given row, column index.
static bool IsSameA(double operand1, double operand2, double tol)
Compare two double variable.
static unsigned int MAX_INDEX
constant variable for row that has no element
static int Compare(const void *pA, const void *pB)
For qick sort compare operation.
static CKNMatrixOperation::CKNCSR * BuildCSRFromFileUnsortdata(FILE *fDataFile, unsigned int nRowOrder, unsigned int nColumnOrder, int nDataCount)
uilding CSR form file and before building CSR sorting data
const unsigned long ERROR_WRONG_ORDER_OPERATION
Error code that means during operation between vector & vector, matrix & vector order don't match...
Definition: CKNGlobal.h:64
unsigned int GetRowIndexNo(unsigned int nIndex)
Getting column size of matrix.
CKNMatrixOperation()
Constructor.
bool SetAt(CKNComplex number, unsigned int nRow, unsigned int nColumn)
Set element to specific index.
Common definition for Solver.
void PushNoneZeroValue(double fRealValue, double fImaginaryValue, unsigned int nRow, unsigned int nCol)
Saving none zero value.
bool DiagonalOperation(CKNVector *pOperand, OPERATION_TYPE type, bool bUseSplitVector)
To diagonal element do operation.
static void MVMulEx_AsyncCommWithLocalBlocks(CKNMatrixOperation::CKNCSR *mylocalblock, CKNMatrixOperation::CKNCSR *leftlocalblock, CKNMatrixOperation::CKNCSR *rightlocalblock, CKNVector *pVector, CKNVector *pResult, double *X, double *Xrt, double *Xlt)
Matrix and vector multiple operation using by block csr.
CKNComplex GetAt(unsigned int nIndex)
Get element value from specific index.
Collection of vector and matrix operation.
unsigned int GetColIndexNo(unsigned int nIndex)
Getting Column information data by index.
bool GetNextRowIndexValue(unsigned int nRowFrom, unsigned int &nValueIndex)
Get row index value finding from nRowFrom to end.
static void MergeVectorEx_Optimal(CKNMatrixOperation::CKNVector *pVector, CKNMatrixOperation::CKNVector *pResultVector, unsigned int nMergeSize, double fFirstIndex, unsigned int nSizeFromPrevRank, unsigned int nSizeFromNextRank, unsigned int nSizetoPrevRank, unsigned int nSizetoNextRank, unsigned int *)
Merge vector for 1 layer exchanging.
bool TrnasPos()
Transpos matrix.
void IncreaseRowIndex(unsigned int nRowFrom)
Increase m_nRowCount array value + 1 from nRowFrom. It means at m_nRowCount element has been inserted...
bool Serialize(double *pBuffer, bool bStore)
Serialize vector.
bool PushMatrixConcurrent(unsigned int nRow, CKNMatrixOperation::LPFILL_MATRIX_DATA lpData, bool bCopyZeroOnSite)
Pushing matrix into CSR several sub matrixs.
CKNComplex operator*(CKNVector &vector)
operation overload for dot product with reference parameter
bool InsertRowBefore(unsigned int nRow)
Insert row before specific row index.
bool SetElement(unsigned int nRow, unsigned int nColumn, CKNComplex element)
Set matrix elements value.
static void UpdateLocalCSR(CKNMatrixOperation::CKNCSR *source, CKNMatrixOperation::CKNCSR *mine, CKNMatrixOperation::CKNCSR *left, CKNMatrixOperation::CKNCSR *right)
#define ATOM_DEFAULT_INDEX
Atom index default value that empty atom instance.
Definition: CKNGlobal.h:53
unsigned int nComponentsLastUnitCell
Atom counts for interoperaton with next node.
static void MVMul(CKNCSR *pAMatrix, CKNVector *pVector, CKNVector *pResult)
Matrix and vector multiple operation.
unsigned int GetColumnCount()
Get matrix column counts.
double GetFirstRowIndex()
Set first row index.
static void MVMulOptimal(CKNCSR *pAMatrix, CKNVector *pVector, CKNVector *pResult)
Matrix and vector multiple operation for multiple call.
struct CKNMatrixOperation::FILL_MATRIX_DATA * LPFILL_MATRIX_DATA
Time measurement class.
void operator=(CKNDMatrix &matrix)
operation overload for subsitution with reference parameter
CKNMatrixOperation::CKNVector operator*(const CKNComplex fScalar, CKNMatrixOperation::CKNVector &vector)
double_vector_t m_vectValueImaginaryBuffer
A member variable for saving none zero elements.
bool InsertColumnBefore(unsigned int nColumn)
Insert column before specific column index.
void SetAtEx(unsigned int nIndex, CKNComplex *pValue)
Set element value in specific index, Call by reference.
void Finalize()
Free allocated memory for vector elements.
void BuildRandomVector()
Building vector that has random value elements.
This class includes functions for matrix debugging.
void ScalarDivision(double fScalar)
Scalar division operation.
bool SetRowElement(CKNVector vector, unsigned int nRowIndex)
Set matrix element with row, column index.
double GetNorm(bool bMPI=false)
Getting norm of vector.
static int GetCurrentRank()
Definition: KNMPIManager.h:42
CKNVector operator*(CKNVector &vector)
operation overload for matrix and vector multiple operation with reference parameter ...
CKNComplex GetElement(unsigned int nRowIndex, unsigned int nColumnIndex)
Get matrix element with row, column index.
CKNComplex * GetValue(unsigned int nIndex)
Getting none zero element value by index.
void ResetValue()
Reset every element to zero.
void DumpCSR(const char *pstrFileName)
For debugging save CSR into file.
double_vector_t m_vectValueRealBuffer
A member variable for saving none zero elements.
void Add(CKNComplex complex)
Adding operation to this class.
Definition: KNComplex.cpp:68
static void BuildLocalCSR(CKNMatrixOperation::CKNCSR *source, CKNMatrixOperation::CKNCSR *mine, CKNMatrixOperation::CKNCSR *left, CKNMatrixOperation::CKNCSR *right)
static void MergeVectorOptimal(CKNMatrixOperation::CKNVector *pSrcVector, CKNMatrixOperation::CKNVector *pResultVector, unsigned int nMergeSize, double fFirstIndex)
Merge vector to sub rank, operated without vector class member function call.
CKNComplex * GetAtPt(unsigned int nIndex)
Get element value from specific index.
void SetAt(unsigned int nIndex, CKNComplex value)
Set element value in specific index, Call by value.
bool SetColumnElement(CKNVector vector, unsigned int nColumnIndex)
Set matrix column.
void operator=(CKNVector &vector)
operation overload for subsitution with reference parameter
unsigned int GetSize()
Return Vector elements size.
static unsigned int * pColumn
For MPI Optimized operation using.
bool InsertColumnAtEnd()
Insert column after last column.
void SetRealNumber(double fRealNumber)
Set real part.
Definition: KNComplex.h:31
static CKNComplex * pValueBuffer
For MPI Optimized operation using.
CKNComplex GetElement(unsigned int nRow, unsigned int nColumn, bool &bResult)
Get Element by index.
const unsigned long ERROR_MALLOC
Error code that means error occur during memory allocation.
Definition: CKNGlobal.h:62
This class for complex operation and saving value.
Definition: KNComplex.h:18
void PlusVector(CKNVector *vector)
Do plus operation between vectors.
void FinishPush()
Insert end index of none zero value index.
bool GetColumnByVector(unsigned int nColumnIndex, CKNMatrixOperation::CKNVector *pVector)
Get column elements.
MPI Mangement class.
void SetRowCount(unsigned int nRow)
Settting row size of matrix.
static CKNComplex MulltiplyComplex(CKNComplex complex1, CKNComplex complex2)
Multiple operation between complex numbers.
Definition: KNComplex.cpp:127
static void MeasurementStart(MEASUREMENT_INDEX index)
Measurement start for part.
std::vector< unsigned int, boost::alignment::aligned_allocator< unsigned int, 64 > > uint_vector_t
static void AllocateLocalCSR(CKNMatrixOperation::CKNCSR **mine, CKNMatrixOperation::CKNCSR **left, CKNMatrixOperation::CKNCSR **right)
void Finalize()
Deallocating memory for member variable.
bool GetRowByVector(unsigned int nRowIndex, CKNMatrixOperation::CKNVector *pVector)
Get row elements.
CKNVector operator+(CKNVector &vector)
operation overload for vector plus operation with reference parameter
void SetColumnCount(unsigned int nColumn)
Settting column size of matrix.
#define ORBITALS
At Hamiltonian matrix one atom inserted 10 * 10.
Definition: CKNGlobal.h:54
uint_vector_t m_vectRow
A member variable for saving row information.
static bool IsSame(double operand1, double operand2, double tol)
Compare two double variable.
static int Gram_schmidt(CKNVector *pVect1, CKNVector *pVect2)
Doing gam schmidt orthogonalization.
bool InsertVector(unsigned int nStartIndex, CKNMatrixOperation::CKNVector *pVector)
void SetComplexNumber(double fReal, double fImaginaray)
Set Complex number using real part and imaginary part.
Definition: KNComplex.cpp:59
struct CKNMatrixOperation::CSR_ELEMENT_DATA * LPCSR_ELEMENT_DATA
bool BuildMatrixFirst(unsigned int nRow, unsigned int nColumn)
Building matrix elements.
static CKNMatrixOperation::CKNCSR * BuildCSRFromFile_(FILE *fDataFile, unsigned int nRowOrder, unsigned int nColumnOrder, int nDataCount)
Building CSR from file using int, int, double, double order.
CKNMatrixOperation::CKNDMatrix * pMatrix
void SetImaginaryNumber(double fImaginaryNumber)
Set imagenary part.
Definition: KNComplex.h:32
Hamiltonian building data.
#define REPEAT_COUNT
This class for describing vector for Lanczos method.
static bool VVDot(CKNVector *pVector1, CKNVector *pVector2, CKNComplex *pResult)
Between vectors dot product operation.
static void FreeLocalCSR(CKNMatrixOperation::CKNCSR *mine, CKNMatrixOperation::CKNCSR *left, CKNMatrixOperation::CKNCSR *right)