IPCC  1.0
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
KNMPIManager.cpp
Go to the documentation of this file.
1 
7 #include "stdafx.h"
8 #include "CKNGlobal.h"
9 #include "KNMPIManager.h"
10 #include "KNIPCCUtility.h"
11 #include "KNTimeMeasurement.h"
12 #include <math.h>
13 #include <string.h>
14 #include <algorithm>
15 
19 bool CKNMPIManager::m_bStartMPI = false;
20 int *CKNMPIManager::m_pLoadBalance = NULL;;
23 int* CKNMPIManager::m_pRecvCount = NULL;
24 int* CKNMPIManager::m_pSendCount = NULL;
25 int* CKNMPIManager::m_pBankInfo = NULL;
26 int* CKNMPIManager::m_pDispls = NULL;
27 MPI_Request CKNMPIManager::m_SendDoubleAsyncRequest = MPI_REQUEST_NULL;
28 MPI_Request CKNMPIManager::m_ReceiveDoubleAsyncRequest = MPI_REQUEST_NULL;
29 unsigned int CKNMPIManager::m_nMPILevel = 1;
30 bool CKNMPIManager::m_bNeedPostOperation[10] = { false, false, false, false, false, false, false, false, false, false };
31 MPI_Comm CKNMPIManager::m_mpiCommIndex = MPI_COMM_WORLD;
32 MPI_Comm CKNMPIManager::m_deflationComm = MPI_COMM_NULL;
33 MPI_Group CKNMPIManager::m_lanczosGroup = MPI_GROUP_EMPTY;
34 MPI_Group CKNMPIManager::m_deflationGroup = MPI_GROUP_EMPTY;
35 bool CKNMPIManager::m_bMultiLevel = false;
37 
39 {
40 }
41 
43 {
44 }
45 
51 bool CKNMPIManager::InitLevel(int nMPILevel, int nFindingDegeneratedEVCount)
52 {
53  bool bRtn = true;
54  int nNeedNodeCount = 1;
55  int world_size, rank;
56  int nPerGroupNode;
57  int nLanczosGroupIndex;
58  int *pNewGroupRank = NULL;
59  unsigned int i;
60  MPI_Group commWorldGroup;
61 
62 
63  if( 1 == nMPILevel )
64  {
65  m_mpiCommIndex = MPI_COMM_WORLD;
66  return bRtn;
67  }
68 
69  if( nFindingDegeneratedEVCount > 1 )
70  {
71  m_bNeedPostOperation[1] = true;
72  nNeedNodeCount *= nFindingDegeneratedEVCount;
73  nMPILevel--;
74  }
75 
77  if( nMPILevel == 1 )
78  {
79  bRtn = CheckDeflationNodeCount(nNeedNodeCount);
80  if( !bRtn )
81  return bRtn;
82  }
83 
85  nPerGroupNode = GetTotalNodeCount() / nNeedNodeCount;
86  nLanczosGroupIndex = GetCurrentRank() / nPerGroupNode;
87  pNewGroupRank = (int*)malloc(sizeof(int)*nPerGroupNode);
88 
90  for( i = 0; i < nPerGroupNode ; ++i)
91  pNewGroupRank [i] = nLanczosGroupIndex * nPerGroupNode + i;
92 
93  MPI_Comm_group(MPI_COMM_WORLD,&commWorldGroup);
94  MPI_Group_incl(commWorldGroup,nPerGroupNode,pNewGroupRank ,&m_lanczosGroup);
95  MPI_Comm_create(MPI_COMM_WORLD,m_lanczosGroup,&m_mpiCommIndex);
96  MPI_Comm_size(m_mpiCommIndex, &world_size);
97  MPI_Comm_rank(m_mpiCommIndex, &rank);
98  SetMPIEnviroment(rank, world_size);
99  m_nLanczosGroupIndex = nLanczosGroupIndex;
100 
102  pNewGroupRank = (int*)realloc(pNewGroupRank , sizeof(int)*nFindingDegeneratedEVCount);
103  for( i = 0; i < nFindingDegeneratedEVCount ; ++i)
104  pNewGroupRank [i] = i * nPerGroupNode + GetCurrentRank();
105 
106  MPI_Comm_group(MPI_COMM_WORLD,&commWorldGroup);
107  MPI_Group_incl(commWorldGroup,nFindingDegeneratedEVCount,pNewGroupRank ,&m_deflationGroup);
108  MPI_Comm_create(MPI_COMM_WORLD,m_deflationGroup,&m_deflationComm);
109  MPI_Comm_rank(m_deflationComm, &rank);
110  MPI_Comm_size(m_mpiCommIndex, &world_size);
111 
112  m_bMultiLevel = true;
113 
114  FREE_MEM(pNewGroupRank );
115 
118 
119  bRtn = true;
120  return bRtn;
121 }
122 
128 {
129  if( 0 == m_nTotalNode % nNeedNodeCount )
130  return true;
131  else
132  return false;
133 }
134 
139 void CKNMPIManager::SetMPIEnviroment(int nRank, int nTotalNode)
140 {
141  m_nCurrentRank = nRank;
142  m_nTotalNode = nTotalNode;
143  m_bStartMPI = true;
144 }
145 
151 {
152  int i, j, temp;
153  m_pLoadBalance = (int *)malloc(sizeof(int)*(m_nTotalNode)); // For communication size
154  m_pLoadBalance[0] = nRowCount;
155 
156 #ifdef DISABLE_MPI_ROUTINE
157  return;
158 #endif
159 
160  if (nRowCount < 10 * m_nTotalNode){
161  CKNIPCCUtility::ShowMsg("ERROR :: matrix dimension < 10 * number of CPU for Lanczos "); // Error out
162  }
163 
164  if (0 == (nRowCount / 10) % m_nTotalNode){
165  for (i = 0; i<m_nTotalNode; i++){
166  m_pLoadBalance[i] = nRowCount / m_nTotalNode;
167  }
168  }
169  else{
170  temp = nRowCount;
171  for (i = m_nTotalNode - 1; i>-1; i--){
172  m_pLoadBalance[i] = ((int)(nRowCount / m_nTotalNode / 10 + 1)) * 10;
173  temp -= m_pLoadBalance[i];
174  if ((temp / 10) % i == 0){
175  for (j = 0; j < i; j++){
176  m_pLoadBalance[j] = temp / i;
177  }
178  temp = 0;
179  break;
180  }
181  }
182  }
183 }
184 
189 void CKNMPIManager::LoadBlancing(int nElementCount)
190 {
191  m_pLoadBalance = (int *)malloc(sizeof(int)*(m_nTotalNode)); // For communication size
192 
193 #ifdef DISABLE_MPI_ROUTINE
194  return;
195 #endif
196 
197  MPI_Allgather(&nElementCount, 1, MPI_INTEGER, m_pLoadBalance, 1, MPI_INTEGER, m_mpiCommIndex);
198 }
199 
204 {
205 #ifdef DISABLE_MPI_ROUTINE
206  return 0;
207 #endif
208  if (nRank > m_nTotalNode)
209  return 0;
210 
211  return m_pLoadBalance[nRank];
212 }
213 
218 {
219  bool bRtn = true;
220 
221 #ifdef DISABLE_MPI_ROUTINE
222  return bRtn;
223 #endif
224 
225  if (0 == GetCurrentRank())
226  return bRtn;
227 
228  bRtn = false;
229  return bRtn;
230 }
231 
236 bool CKNMPIManager::IsRootRank(MPI_Comm comm)
237 {
238  if (MPI_COMM_NULL == comm)
239  return true;
240 
241  if( 0 == GetCurrentRank(comm) )
242  return true;
243  else
244  return false;
245 }
246 
252 {
253  int rank;
254 
255  MPI_Comm_rank(comm, &rank);
256  return rank;
257 }
258 
264 {
265 #ifdef DISABLE_MPI_ROUTINE
266  if (!IsInMPIRoutine())
267  return;
268 #endif
269 }
270 
276 void CKNMPIManager::MergeVector(CKNMatrixOperation::CKNVector *pVector, CKNMatrixOperation::CKNVector *pResultVector, unsigned int nMergeSize)
277 {
278  LPCOMPLEX_NUMBER lpSendBuffer = NULL;
279  unsigned int i;
280 
281 #ifdef DISABLE_MPI_ROUTINE
282  return;
283 #endif
284 
285 
286  double *pBuffer = (double*)malloc(sizeof(double)*nMergeSize);
288  MPI_Allgatherv(pVector->m_vectValueRealBuffer.data(), GetCurrentLoadBalanceCount(), MPI_DOUBLE,
289  pBuffer, m_pRecvCount, m_pDispls, MPI_DOUBLE, m_mpiCommIndex);
290 
291  for (i = 0; i < nMergeSize; i++)
292  pResultVector->m_vectValueRealBuffer[i] = pBuffer[i];
293 
294  MPI_Allgatherv(pVector->m_vectValueImaginaryBuffer.data(), GetCurrentLoadBalanceCount(), MPI_DOUBLE,
295  pBuffer, m_pRecvCount, m_pDispls, MPI_DOUBLE, m_mpiCommIndex);
296 
297  for (i = 0; i < nMergeSize; i++)
298  pResultVector->m_vectValueImaginaryBuffer[i] = pBuffer[i];
299 
300  FREE_MEM(pBuffer);
301 
303 
304 }
305 
317 void CKNMPIManager::MergeVectorEx_Optimal(CKNMatrixOperation::CKNVector *pSrcVector, CKNMatrixOperation::CKNVector *pResultVector, unsigned int nMergeSize, double fFirstIndex, unsigned int nSizeFromPrevRank, unsigned int nSizeFromNextRank, unsigned int nSizetoPrevRank, unsigned int nSizetoNextRank, unsigned int *mPos)
318 {
319  if (m_nTotalNode <= 3)
320  {
322  MPI_Allgatherv(pSrcVector->m_vectValueRealBuffer.data(), GetCurrentLoadBalanceCount(), MPI_DOUBLE, pResultVector->m_vectValueRealBuffer.data(), m_pRecvCount, m_pDispls, MPI_DOUBLE, m_mpiCommIndex);
323  MPI_Allgatherv(pSrcVector->m_vectValueImaginaryBuffer.data(), GetCurrentLoadBalanceCount(), MPI_DOUBLE, pResultVector->m_vectValueImaginaryBuffer.data(), m_pRecvCount, m_pDispls, MPI_DOUBLE, m_mpiCommIndex);
325  mPos[0] = -1; mPos[1] = -1; mPos[2] = -1;
326  }
327  else
328  {
329  long long fCurrentRankPos = fFirstIndex;
330  int nPrevRank = (m_nCurrentRank - 1 + m_nTotalNode) % m_nTotalNode;
331  int nNextRank = (m_nCurrentRank + 1) % m_nTotalNode;
332  long long fPrevRankPos = -1, fNextRankPos = -1;
333  double *pSendBuffer = NULL, *pRecvBuffer = NULL;
334  MPI_Request req[2];
335  MPI_Status status[2];
336 
337  if (0 == m_nCurrentRank)
338  fPrevRankPos = nMergeSize - nSizeFromPrevRank;
339  else
340  fPrevRankPos = fFirstIndex - nSizeFromPrevRank;
341 
342  if (m_nCurrentRank == m_nTotalNode - 1)
343  fNextRankPos = 0;
344  else
345  fNextRankPos = fFirstIndex + GetLoadBalanceCount(m_nCurrentRank);
346 
347  mPos[0] = (unsigned int)fPrevRankPos; mPos[1] = (unsigned int)fCurrentRankPos; mPos[2] = (unsigned int)fNextRankPos;
348 
349  //printf("Rank %d: myload=%d, nSizeFromPrevRank=%d, nSizeFromNextRank=%d, nSizetoPrevRank=%d, nSizetoNextRank=%d, fPrevRankPos=%d, fNextRankPos=%d\n", m_nCurrentRank, GetLoadBalanceCount(m_nCurrentRank), nSizeFromPrevRank, nSizeFromNextRank, nSizetoPrevRank, nSizetoNextRank, fPrevRankPos, fNextRankPos);
350 
351  pSendBuffer = pSrcVector->m_vectValueRealBuffer.data();
352  pRecvBuffer = pResultVector->m_vectValueRealBuffer.data();
353 
355  MPI_Irecv(pRecvBuffer + fPrevRankPos, nSizeFromPrevRank, MPI_DOUBLE, nPrevRank, m_nCurrentRank, m_mpiCommIndex, &req[0]);
356  MPI_Isend(pSendBuffer + GetLoadBalanceCount(m_nCurrentRank) - nSizetoNextRank, nSizetoNextRank, MPI_DOUBLE, nNextRank, nNextRank, m_mpiCommIndex, &req[1]);
357  MPI_Waitall(2, req, status);
358 
359  MPI_Irecv(pRecvBuffer + fNextRankPos, nSizeFromNextRank, MPI_DOUBLE, nNextRank, m_nCurrentRank, m_mpiCommIndex, &req[0]);
360  MPI_Isend(pSendBuffer, nSizetoPrevRank, MPI_DOUBLE, nPrevRank, nPrevRank, m_mpiCommIndex, &req[1]);
361  MPI_Waitall(2, req, status);
363 
364  pSendBuffer = pSrcVector->m_vectValueImaginaryBuffer.data();
365  pRecvBuffer = pResultVector->m_vectValueImaginaryBuffer.data();
366 
368  MPI_Irecv(pRecvBuffer + fPrevRankPos, nSizeFromPrevRank, MPI_DOUBLE, nPrevRank, m_nCurrentRank, m_mpiCommIndex, &req[0]);
369  MPI_Isend(pSendBuffer + GetLoadBalanceCount(m_nCurrentRank) - nSizetoNextRank, nSizetoNextRank, MPI_DOUBLE, nNextRank, nNextRank, m_mpiCommIndex, &req[1]);
370  MPI_Waitall(2, req, status);
371 
372  MPI_Irecv(pRecvBuffer + fNextRankPos, nSizeFromNextRank, MPI_DOUBLE, nNextRank, m_nCurrentRank, m_mpiCommIndex, &req[0]);
373  MPI_Isend(pSendBuffer, nSizetoPrevRank, MPI_DOUBLE, nPrevRank, nPrevRank, m_mpiCommIndex, &req[1]);
374  MPI_Waitall(2, req, status);
376 
377  memcpy(pResultVector->m_vectValueRealBuffer.data() + (long long)fFirstIndex, pSrcVector->m_vectValueRealBuffer.data(), GetLoadBalanceCount(m_nCurrentRank) * sizeof(double));
378  memcpy(pResultVector->m_vectValueImaginaryBuffer.data() + (long long)fFirstIndex, pSrcVector->m_vectValueImaginaryBuffer.data(), GetLoadBalanceCount(m_nCurrentRank) * sizeof(double));
379 
380  }
381 }
382 
389 void CKNMPIManager::MergeVectorOptimal(CKNMatrixOperation::CKNVector *pSrcVector, CKNMatrixOperation::CKNVector *pResultVector, unsigned int nMergeSize, double fFirstIndex)
390 {
391  if( m_nTotalNode <= 3 )
392  {
394  MPI_Allgatherv(pSrcVector->m_vectValueRealBuffer.data(), GetCurrentLoadBalanceCount(), MPI_DOUBLE,
395  pResultVector->m_vectValueRealBuffer.data(), m_pRecvCount, m_pDispls, MPI_DOUBLE, m_mpiCommIndex);
396  MPI_Allgatherv(pSrcVector->m_vectValueImaginaryBuffer.data(), GetCurrentLoadBalanceCount(), MPI_DOUBLE,
397  pResultVector->m_vectValueImaginaryBuffer.data(), m_pRecvCount, m_pDispls, MPI_DOUBLE, m_mpiCommIndex);
399  }
400  else
401  {
402  double fCurrentRankPos = fFirstIndex;
403  long long fPrevRankPos = -1, fNextRankPos = -1;
404  int nPrevRank = (m_nCurrentRank-1+m_nTotalNode)%m_nTotalNode;
405  int nNextRank = (m_nCurrentRank+1)%m_nTotalNode;
406  long long nMax;
407  double *pSendBuffer = NULL, *pRecvBuffer = NULL;
408  MPI_Request req[2];
409  MPI_Status status[2];
410 
411  if( 0 == m_nCurrentRank )
412  fPrevRankPos = nMergeSize - GetLoadBalanceCount(nPrevRank);
413  else
414  fPrevRankPos = fFirstIndex - GetLoadBalanceCount(nPrevRank);
415 
416  if (m_nCurrentRank == m_nTotalNode - 1)
417  fNextRankPos = 0;
418  else
419  fNextRankPos = fCurrentRankPos + GetCurrentLoadBalanceCount();
420 
421 #ifdef _WIN32
422  nMax = max(GetLoadBalanceCount(nPrevRank), GetLoadBalanceCount(nNextRank));
423 #else //_WIN32
424  nMax = std::max(GetLoadBalanceCount(nPrevRank), GetLoadBalanceCount(nNextRank));
425 #endif//
426 
428  pRecvBuffer = (double*)malloc(sizeof(double)*nMax*2);
429  pSendBuffer = (double*)malloc(sizeof(double)*GetLoadBalanceCount(m_nCurrentRank)*2);
431 
432 
433  memcpy(pSendBuffer, pSrcVector->m_vectValueRealBuffer.data(), sizeof(double)*GetLoadBalanceCount(m_nCurrentRank));
434  memcpy(pSendBuffer+ GetLoadBalanceCount(m_nCurrentRank), pSrcVector->m_vectValueImaginaryBuffer.data() , sizeof(double)*GetLoadBalanceCount(m_nCurrentRank));
435 
437  MPI_Irecv(pRecvBuffer, 2 * GetLoadBalanceCount(nPrevRank), MPI_DOUBLE, nPrevRank, m_nCurrentRank, m_mpiCommIndex, &req[0]);
438  MPI_Isend(pSendBuffer, 2 * GetLoadBalanceCount(m_nCurrentRank), MPI_DOUBLE, nNextRank, nNextRank, m_mpiCommIndex, &req[1]);
439  MPI_Waitall(2, req, status);
441 
442  memcpy(pResultVector->m_vectValueRealBuffer.data() + fPrevRankPos, pRecvBuffer, GetLoadBalanceCount(nPrevRank) * sizeof(double));
443  memcpy(pResultVector->m_vectValueImaginaryBuffer.data() + fPrevRankPos, pRecvBuffer + GetLoadBalanceCount(nPrevRank), GetLoadBalanceCount(nPrevRank) * sizeof(double));
444 
446  MPI_Irecv(pRecvBuffer, 2 * GetLoadBalanceCount(nNextRank), MPI_DOUBLE, nNextRank, m_nCurrentRank, m_mpiCommIndex, &req[0]);
447  MPI_Isend(pSendBuffer, 2 * GetLoadBalanceCount(m_nCurrentRank), MPI_DOUBLE, nPrevRank, nPrevRank, m_mpiCommIndex, &req[1]);
448  MPI_Waitall(2, req, status);
450  memcpy(pResultVector->m_vectValueRealBuffer.data() + fNextRankPos, pRecvBuffer, GetLoadBalanceCount(nNextRank) * sizeof(double));
451  memcpy(pResultVector->m_vectValueImaginaryBuffer.data() + fNextRankPos, pRecvBuffer + GetLoadBalanceCount(nNextRank), GetLoadBalanceCount(nNextRank) * sizeof(double));
452 
454  FREE_MEM(pRecvBuffer);
455  FREE_MEM(pSendBuffer);
457 
458  memcpy(pResultVector->m_vectValueRealBuffer.data() + (long long)fFirstIndex, pSrcVector->m_vectValueRealBuffer.data(), GetLoadBalanceCount(m_nCurrentRank) * sizeof(double));
459  memcpy(pResultVector->m_vectValueImaginaryBuffer.data() + (long long)fFirstIndex, pSrcVector->m_vectValueImaginaryBuffer.data(), GetLoadBalanceCount(m_nCurrentRank) * sizeof(double));
460  }
461 }
462 
468 {
469  LPCOMPLEX_NUMBER lpSendBuffer = NULL;
470  LPCOMPLEX_NUMBER lpRecvBuffer = NULL;
471  unsigned int i;
472  int *pSendCounts = NULL, *pDispls = NULL;
473 
474 #ifdef DISABLE_MPI_ROUTINE
475  return;
476 #endif
477 
479  pSendCounts = (int*)malloc(sizeof(int)*GetTotalNodeCount());
480  pDispls = (int*)malloc(sizeof(int)*GetTotalNodeCount());
482 
483  pDispls[0] = 0;
484  pSendCounts[0] = 2 * GetLoadBalanceCount(0);
485  for (i = 1; i < (unsigned int)GetTotalNodeCount(); i++)
486  {
487  pSendCounts[i] = 2 * GetLoadBalanceCount(i);
488  pDispls[i] = pDispls[i - 1] + 2 * GetLoadBalanceCount(i - 1);
489  }
490 
491  if (nRootRank == GetCurrentRank())
492  {
493  lpSendBuffer = ConvertVectorToMPIComplexBuffer(pVector);
494  pVector->Finalize();
495  }
496 
498  lpRecvBuffer = (LPCOMPLEX_NUMBER)malloc(sizeof(COMPLEX_NUMBER)*GetCurrentLoadBalanceCount() * 2);
500 
501  if (NULL == lpRecvBuffer)
502  throw ERROR_MALLOC;
503 
505  MPI_Scatterv(lpSendBuffer, pSendCounts, pDispls, MPI_DOUBLE,
506  lpRecvBuffer, 2 * GetCurrentLoadBalanceCount(), MPI_DOUBLE, nRootRank, m_mpiCommIndex);
508 
510  for (i = 0; i < (unsigned int)GetCurrentLoadBalanceCount(); i++)
511  pVector->SetAt(i, lpRecvBuffer[i].fReal, lpRecvBuffer[i].fImginary);
512 
514  FREE_MEM(pSendCounts);
515  FREE_MEM(pDispls);
516  FREE_MEM(lpRecvBuffer);
518 }
519 
524 void CKNMPIManager::BroadcastBool(bool *boolValue, int nRootRank)
525 {
526 #ifdef DISABLE_MPI_ROUTINE
527  return;
528 #endif
529 
531  MPI_Bcast(boolValue, 1, MPI_C_BOOL, nRootRank, m_mpiCommIndex);
533 }
534 
540 void CKNMPIManager::BroadcastDouble(double *pValue, unsigned int nSize, int nRootRank, MPI_Comm comm)
541 {
542 #ifdef DISABLE_MPI_ROUTINE
543  return;
544 #endif
545 
547  if( MPI_COMM_NULL == comm )
548  MPI_Bcast(pValue, nSize, MPI_DOUBLE, nRootRank, m_mpiCommIndex);
549  else
550  MPI_Bcast(pValue, nSize, MPI_DOUBLE, nRootRank, comm);
552 }
553 
560 void CKNMPIManager::BroadcastInt(int *pValue, unsigned int nSize, int nRootRank, MPI_Comm comm)
561 {
562 #ifdef DISABLE_MPI_ROUTINE
563  return;
564 #endif
565 
567  if( MPI_COMM_NULL == comm )
568  MPI_Bcast(pValue, nSize, MPI_INT, nRootRank, m_mpiCommIndex);
569  else
570  MPI_Bcast(pValue, nSize, MPI_INT, nRootRank, comm);
572 }
573 
579 {
580  unsigned int nCastData[4];
581  unsigned int i;
582 
583 #ifdef DISABLE_MPI_ROUTINE
584  return;
585 #endif
586 
587  if( IsRootRank() )
588  {
589  nCastData[0] = lpResult->nEigenValueCount;
590  nCastData[1] = lpResult->nEigenValueCountForMemeory;
591  nCastData[2] = lpResult->nMaxEigenValueFoundIteration;
592  nCastData[3] = lpResult->nEigenVectorSize;
593  }
594 
596  MPI_Bcast(nCastData, 4, MPI_INT, GetRootRank(), m_mpiCommIndex);
598 
599  if( !IsRootRank() )
600  {
601  lpResult->nEigenValueCount = nCastData[0];
602  lpResult->nEigenValueCountForMemeory = nCastData[1];
603  lpResult->nMaxEigenValueFoundIteration = nCastData[2];
604  lpResult->nEigenVectorSize = nCastData[3];
605 
607  lpResult->pEigenValueFoundIteration = (unsigned int*)malloc(sizeof(unsigned int)*lpResult->nEigenValueCount);
608  lpResult->pEigenVectors = (double**)malloc(sizeof(double*)*lpResult->nEigenValueCount);
609  for (i = 0; i < lpResult->nEigenValueCount; ++i)
610  lpResult->pEigenVectors[i] = (double*)malloc(sizeof(double)*lpResult->nEigenVectorSize);
612  }
613 
615  MPI_Bcast(lpResult->pEigenValueFoundIteration, lpResult->nEigenValueCount, MPI_INT, GetRootRank(), m_mpiCommIndex);
616  for (i = 0; i < lpResult->nEigenValueCount; ++i)
617  MPI_Bcast(lpResult->pEigenVectors[i], lpResult->nEigenVectorSize, MPI_DOUBLE, GetRootRank(), m_mpiCommIndex);
619 }
620 
626 {
627 #ifdef DISABLE_MPI_ROUTINE
628  return;
629 #endif
630 
631  double fSend[2], fRecv[2];
632 
633  fSend[0] = pNumber->GetRealNumber();
634  fSend[1] = pNumber->GetImaginaryNumber();
635 
637  MPI_Allreduce(fSend, fRecv, 2, MPI_DOUBLE, MPI_SUM, m_mpiCommIndex);
639 
640  pNumber->SetComplexNumber(fRecv[0], fRecv[1]);
641 }
642 
647 double CKNMPIManager::AllReduceDouble(double fNumber)
648 {
649 #ifdef DISABLE_MPI_ROUTINE
650  return fNumber;
651 #endif
652 
653  double fRecv;
654 
656  MPI_Allreduce(&fNumber, &fRecv, 1, MPI_DOUBLE, MPI_SUM, m_mpiCommIndex);
658 
659  return fRecv;
660 }
661 
668 {
669  LPCOMPLEX_NUMBER lpResult = NULL;
670  unsigned int i, nCount;
671 
672  nCount = pVector->GetSize();
673  lpResult = m_pConvertingBuffer;
674 
675  for (i = 0; i < nCount; i++)
676  {
677  lpResult[i].fReal = pVector->GetAt(i).GetRealNumber();
678  lpResult[i].fImginary = pVector->GetAt(i).GetImaginaryNumber();
679  }
680 
681  return lpResult;
682 }
683 
685 {
686 #ifdef DISABLE_MPI_ROUTINE
687  return 0;
688 #endif
689 
691 }
692 
694 {
695  m_bStartMPI = false;
696  m_nCurrentRank = 0;
697  m_nTotalNode = 1;
705  if( MPI_GROUP_EMPTY != m_lanczosGroup )
706  MPI_Group_free(&m_lanczosGroup);
707  if( MPI_GROUP_EMPTY != m_deflationGroup )
708  MPI_Group_free(&m_deflationGroup);
709  if( MPI_COMM_NULL != m_mpiCommIndex && MPI_COMM_WORLD != m_mpiCommIndex)
710  MPI_Comm_free(&m_mpiCommIndex);
711  if( MPI_COMM_NULL != m_deflationComm )
712  MPI_Comm_free(&m_deflationComm);
713 }
714 
719 {
720  unsigned int i;
721 
723  m_pRecvCount = (int*)malloc(sizeof(int)*GetTotalNodeCount());
724  m_pSendCount = (int*)malloc(sizeof(int)*GetTotalNodeCount());
725  m_pDispls = (int*)malloc(sizeof(int)*GetTotalNodeCount());
726  m_pDispls[0] = 0;
728  for (i = 1; i < (unsigned int)GetTotalNodeCount(); i++)
729  {
731  m_pDispls[i] = m_pDispls[i - 1] + GetLoadBalanceCount(i - 1);
732  }
734 }
735 
742 void CKNMPIManager::SendDoubleBufferSync(int nTargetRank, double *pBuffer, int nSize, MPI_Request *req, MPI_Comm commWorld)
743 {
744  int nRank = CKNMPIManager::GetCurrentRank();
745  if( MPI_COMM_NULL == commWorld)
746  //MPI_Send(pBuffer, nSize, MPI_DOUBLE, nTargetRank, 0, m_mpiCommIndex);
747  MPI_Send(pBuffer, nSize, MPI_DOUBLE, nTargetRank, nTargetRank, m_mpiCommIndex);
748  else
749  if( MPI_SUCCESS != MPI_Send(pBuffer, nSize, MPI_DOUBLE, nTargetRank, nTargetRank, commWorld) )
750  //if( MPI_SUCCESS != MPI_Send(pBuffer, nSize, MPI_DOUBLE, nTargetRank, 0, commWorld) )
751  printf("Oh my god!\n");
752 }
753 
758 {
759  MPI_Status status;
760 
761  MPI_Wait(&m_SendDoubleAsyncRequest, &status);
762 }
763 
770 void CKNMPIManager::ReceiveDoubleBufferSync(int nSourceRank, double *pBuffer, int nSize, MPI_Request *req, MPI_Comm commWorld)
771 {
772  MPI_Status status;
773  if( MPI_COMM_NULL == commWorld)
774  //MPI_Recv(pBuffer, nSize, MPI_DOUBLE, nSourceRank, 0, m_mpiCommIndex, &status);
775  MPI_Recv(pBuffer, nSize, MPI_DOUBLE, nSourceRank, CKNMPIManager::GetCurrentRank(), m_mpiCommIndex, &status);
776  else
777  //MPI_Recv(pBuffer, nSize, MPI_DOUBLE, nSourceRank, 0, commWorld, &status);
778  MPI_Recv(pBuffer, nSize, MPI_DOUBLE, nSourceRank, CKNMPIManager::GetCurrentRank(commWorld), commWorld, &status);
779 }
780 
785 {
786  MPI_Status status;
787 
788  MPI_Wait(&m_ReceiveDoubleAsyncRequest, &status);
789 }
790 
792 {
794  MPI_Barrier(MPI_COMM_WORLD);
795 }
796 
802 int* CKNMPIManager::GetEigenvalueCountFromDeflationGroup(int nDeflationGroupCount, int nLocalEVCount)
803 {
804  int *pEVCount = NULL;
805 
807  pEVCount = (int*)malloc(sizeof(int)*nDeflationGroupCount);
808 
809  MPI_Gather(&nLocalEVCount, 1, MPI_INT, pEVCount, 1, MPI_INT, 0, m_deflationComm);
810 
811  return pEVCount;
812 }
813 
822 void CKNMPIManager::GatherVDouble(int nSourceCount, double *pReceiveBuffer, int *pSourceCount, double *pSendBuffer, int nSendCount, MPI_Comm comm)
823 {
824  int *pReceiveCount = NULL;
825  int *pDisp = NULL;
826  unsigned int i;
827 
828  if( IsDeflationRoot() )
829  {
830  pDisp = (int*)malloc(sizeof(int)*nSourceCount);
831  pDisp[0] = 0;
832  for( i = 1; i < nSourceCount ; ++i)
833  pDisp[i] = pDisp[i-1] + pSourceCount[i-1];
834  }
835 
836  if( MPI_COMM_NULL == comm )
837  MPI_Gatherv(pSendBuffer, nSendCount, MPI_DOUBLE, pReceiveBuffer, pSourceCount, pDisp, MPI_DOUBLE, 0, m_mpiCommIndex);
838  else
839  MPI_Gatherv(pSendBuffer, nSendCount, MPI_DOUBLE, pReceiveBuffer, pSourceCount, pDisp, MPI_DOUBLE, 0, comm);
840 
841  FREE_MEM(pDisp);
842 }
843 
852 void CKNMPIManager::GatherVInt(int nSourceCount, int *pReceiveBuffer, int *pSourceCount, int *pSendBuffer, int nSendCount, MPI_Comm comm)
853 {
854  int *pReceiveCount = NULL;
855  int *pDisp = NULL;
856  unsigned int i;
857 
858  if( IsDeflationRoot() )
859  {
860  pDisp = (int*)malloc(sizeof(int)*nSourceCount);
861  pDisp[0] = 0;
862  for( i = 1; i < nSourceCount ; ++i)
863  pDisp[i] = pDisp[i-1] + pSourceCount[i-1];
864  }
865 
866  if( MPI_COMM_NULL == comm )
867  MPI_Gatherv(pSendBuffer, nSendCount, MPI_INT, pReceiveBuffer, pSourceCount, pDisp, MPI_INT, 0, m_mpiCommIndex);
868  else
869  MPI_Gatherv(pSendBuffer, nSendCount, MPI_INT, pReceiveBuffer, pSourceCount, pDisp, MPI_INT, 0, comm);
870 
871  FREE_MEM(pDisp);
872 }
873 
878 void CKNMPIManager::ExchangeCommand(double *pfCommand, MPI_Comm comm)
879 {
880  BroadcastDouble(pfCommand, COMMAND_SIZE, 0, comm);
881 }
882 
890 void CKNMPIManager::SendVectorSync(int nTargetRank, CKNMatrixOperation::CKNVector *pVector, int nSize, MPI_Request *req, MPI_Comm commWorld)
891 {
892  double *pBuffer = NULL;
893 
894  pBuffer = (double*)malloc(sizeof(double)*nSize*2);
895  pVector->Serialize(pBuffer, false);
896 
897  SendDoubleBufferSync(nTargetRank, pBuffer, nSize * 2, req, commWorld);
898 
899 
900  FREE_MEM(pBuffer);
901 }
902 
910 void CKNMPIManager::ReceiveVectorSync(int nSourceRank, CKNMatrixOperation::CKNVector *pVector, int nSize, MPI_Request *req, MPI_Comm commWorld)
911 {
912  double *pBuffer = NULL;
913 
914  pBuffer = (double*)malloc(sizeof(double)*nSize*2);
915 
916  ReceiveDoubleBufferSync(nSourceRank, pBuffer, nSize * 2, req, commWorld);
917 
918  pVector->Serialize(pBuffer, true);
919 
920  FREE_MEM(pBuffer);
921 }
static void BroadcastBool(bool *boolValue, int nRootRank=0)
Broadcst boolean value.
static int * m_pSendCount
Sending count variable for MPI comminication.
Definition: KNMPIManager.h:94
void SetSize(unsigned int nSize)
Set Vector elements size.
static void InitCommunicationBufferMetric()
Initializing MPI Communication buffer for MVMul.
double GetImaginaryNumber() const
Get imaginary part.
Definition: KNComplex.h:27
static void BroadcastInt(int *pValue, unsigned int nSize, int nRootRank=0, MPI_Comm comm=MPI_COMM_NULL)
Broadcst boolean value.
static void FinalizeManager()
Get Root rank.
static void ShowMsg(char *pszBuffer)
Show message.
static void SetMPIEnviroment(int nRank, int nTotalNode)
Set MPI Enviroment.
static void WaitSendDoubleBufferSync(MPI_Request *req)
Waiting sending double buffer sync function.
static void AllReduceComlex(CKNComplex *pNumber, CKNTimeMeasurement::MEASUREMENT_INDEX INDEX=CKNTimeMeasurement::COMM)
Do all reduce function with CKNComplex.
static int * GetEigenvalueCountFromDeflationGroup(int nDeflationGroupCount, int nLocalEVCount)
Checking is root rank of Deflation computation.
static void BroadcastLanczosResult(CKNLanczosMethod::LPEIGENVALUE_RESULT lpResult, int nIterationCount)
Broadcast Lanczos result.
static bool InitLevel(int nMPILevel, int nFindingDegeneratedEVCount)
Init MPI Level, most low level is for multi node cacluation for Lanczos.
static unsigned int m_nMPILevel
MPI Level.
Definition: KNMPIManager.h:99
static LPCOMPLEX_NUMBER m_pCommBuffer
Data buffer for MPI Communication.
Definition: KNMPIManager.h:91
static void BroadcastDouble(double *pValue, unsigned int nSize, int nRootRank=0, MPI_Comm comm=MPI_COMM_NULL)
Broadcst boolean value.
double GetRealNumber() const
Get real part.
Definition: KNComplex.h:26
static void SplitVector(CKNMatrixOperation::CKNVector *pVector, int nRootRank)
Split vector to sub rank.
static int GetCurrentLoadBalanceCount()
Get Current node's rank load balancing number.
static void MeasurementEnd(MEASUREMENT_INDEX index)
Measurement end for part.
static void MergeVector(CKNMatrixOperation::CKNVector *pVector, CKNMatrixOperation::CKNVector *pResultVector, unsigned int nMergeSize)
Merge vector to sub rank.
static int GetTotalNodeCount()
Definition: KNMPIManager.h:44
static bool CheckDeflationNodeCount(int nNeedNodeCount)
Checking node counts fix to deflation group.
static void ReceiveVectorSync(int nSourceRank, CKNMatrixOperation::CKNVector *pVector, int nSize, MPI_Request *req, MPI_Comm commWorld=MPI_COMM_NULL)
Receiving Vector with sync.
static int GetLoadBalanceCount(int nRank)
static LPCOMPLEX_NUMBER ConvertVectorToMPIComplexBuffer(CKNMatrixOperation::CKNVector *pVector)
Convert vector class to MPI_COMPLEX array.
static double AllReduceDouble(double fNumber)
Do all reduce function with CKNComplex.
static int GetRootRank()
Definition: KNMPIManager.h:60
static MPI_Comm m_deflationComm
Deflation computing MPI_Comm.
Definition: KNMPIManager.h:102
static int * m_pRecvCount
Reciving count variable for MPI comminication.
Definition: KNMPIManager.h:93
static int m_nCommWorldRank
MPI Rank before split.
Definition: KNMPIManager.h:87
static MPI_Request m_ReceiveDoubleAsyncRequest
Request for receving double.
Definition: KNMPIManager.h:98
static bool IsInMPIRoutine()
Definition: KNMPIManager.h:47
static int m_nTotalNode
Total node count.
Definition: KNMPIManager.h:88
static void LoadBlancing(int nElementCount)
Load blancing for MPI, This function for lanczos solving with geometric constrcution.
static void SetShow(bool bShow)
Definition: KNIPCCUtility.h:30
static void GatherVInt(int nSourceCount, int *pReceiveBuffer, int *pSourceCount, int *pSendBuffer, int nSendCount, MPI_Comm comm=MPI_COMM_NULL)
GahterV for int wrapping function.
static MPI_Request m_SendDoubleAsyncRequest
Request for sending double.
Definition: KNMPIManager.h:97
CKNMPIManager()
Constructor.
Common definition for Solver.
static void SendVectorSync(int nTargetRank, CKNMatrixOperation::CKNVector *pVector, int nSize, MPI_Request *req, MPI_Comm commWorld=MPI_COMM_NULL)
Getting Deflation computing group MPI_Comm.
static int * m_pBankInfo
After MPI Split bank infomation.
Definition: KNMPIManager.h:95
CKNComplex GetAt(unsigned int nIndex)
Get element value from specific index.
static void MergeVectorEx_Optimal(CKNMatrixOperation::CKNVector *pVector, CKNMatrixOperation::CKNVector *pResultVector, unsigned int nMergeSize, double fFirstIndex, unsigned int nSizeFromPrevRank, unsigned int nSizeFromNextRank, unsigned int nSizetoPrevRank, unsigned int nSizetoNextRank, unsigned int *)
Merge vector for 1 layer exchanging.
bool Serialize(double *pBuffer, bool bStore)
Serialize vector.
static bool IsLanczosComputeRoot()
Barrier current deflation group.
Definition: KNMPIManager.h:71
Structure for engienvalue computing.
Time measurement class.
double_vector_t m_vectValueImaginaryBuffer
A member variable for saving none zero elements.
static bool m_bMultiLevel
Flag for Multilevel MPI group.
Definition: KNMPIManager.h:106
~CKNMPIManager()
Destructor.
void Finalize()
Free allocated memory for vector elements.
This class includes functions for matrix debugging.
static LPCOMPLEX_NUMBER m_pConvertingBuffer
Data buffer for Vector converting.
Definition: KNMPIManager.h:92
static int * m_pDispls
Displ for MPI comminication.
Definition: KNMPIManager.h:96
static void ExchangeCommand(double *pfCommand, MPI_Comm comm)
Gather eigenvalue finding iteration number from deflation group.
static int GetCurrentRank()
Definition: KNMPIManager.h:42
double_vector_t m_vectValueRealBuffer
A member variable for saving none zero elements.
static void GatherVDouble(int nSourceCount, double *pReceiveBuffer, int *pSourceCount, double *pSendBuffer, int nSendCount, MPI_Comm comm=MPI_COMM_NULL)
GatherV for double wrapping function.
static void MergeVectorOptimal(CKNMatrixOperation::CKNVector *pSrcVector, CKNMatrixOperation::CKNVector *pResultVector, unsigned int nMergeSize, double fFirstIndex)
Merge vector to sub rank, operated without vector class member function call.
void SetAt(unsigned int nIndex, CKNComplex value)
Set element value in specific index, Call by value.
static MPI_Group m_deflationGroup
MPI Group for Deflation computation.
Definition: KNMPIManager.h:104
#define FREE_MEM(pointer)
Macro for memory allocation and assign null value.
Definition: CKNGlobal.h:20
unsigned int GetSize()
Return Vector elements size.
static int * m_pLoadBalance
Load blancing for MPI Communication.
Definition: KNMPIManager.h:90
const unsigned long ERROR_MALLOC
Error code that means error occur during memory allocation.
Definition: CKNGlobal.h:62
This class for complex operation and saving value.
Definition: KNComplex.h:18
MPI Mangement class.
static void BarrierAllComm()
Is Multilevel MPI Setting.
static MPI_Comm m_mpiCommIndex
Lanczos Method MPI_Comm.
Definition: KNMPIManager.h:101
static void MeasurementStart(MEASUREMENT_INDEX index)
Measurement start for part.
static void WaitReceiveDoubleBufferAsync(MPI_Request *req)
Waiting recevinging double buffer sync function.
static void LoadBlancingForLanczos(int nRowCount)
Load blancing for MPI, this function only for lanczos solving without geometric constrcution.
static bool m_bNeedPostOperation[10]
MPI Level.
Definition: KNMPIManager.h:100
static MPI_Group m_lanczosGroup
MPI Group for Lanczos computation.
Definition: KNMPIManager.h:103
static int m_nCurrentRank
Getting Lanczos group index.
Definition: KNMPIManager.h:83
static bool IsRootRank()
Get Total node count.
#define COMMAND_SIZE
Definition: CKNGlobal.h:102
static bool m_bStartMPI
MPI_Init call or not.
Definition: KNMPIManager.h:89
static void SendDoubleBufferSync(int nTargetRank, double *pBuffer, int nSize, MPI_Request *req, MPI_Comm commWorld=MPI_COMM_NULL)
Sending buffer for double data array with sync.
void SetComplexNumber(double fReal, double fImaginaray)
Set Complex number using real part and imaginary part.
Definition: KNComplex.cpp:59
struct CKNMPIManager::COMPLEX_NUMBER * LPCOMPLEX_NUMBER
static unsigned int m_nLanczosGroupIndex
MPI Group index for Lanczos group.
Definition: KNMPIManager.h:105
This class for describing vector for Lanczos method.
static void BroadcastVector(CKNMatrixOperation::CKNVector *pVector)
Check this processing running on MPI Enviorment.
static bool IsDeflationRoot()
Checking is root rank of Lanczos computation.
Definition: KNMPIManager.h:72
static void ReceiveDoubleBufferSync(int nSourceRank, double *pBuffer, int nSize, MPI_Request *req, MPI_Comm commWorld=MPI_COMM_NULL)
Receivinging buffer for double data array with sync.