修訂 | 59d1e5e4eee8487da6fdbe5f274800ca75711421 (tree) |
---|---|
時間 | 2013-08-08 14:29:13 |
作者 | Mikiya Fujii <mikiya.fujii@gmai...> |
Commiter | Mikiya Fujii |
Constructionf fock matrix is MPI-parallelized. #31851
git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1461 1136aad2-a195-0410-b898-f5ea1d11b9d8
@@ -1374,22 +1374,27 @@ void Cndo2::CalcFockMatrix(double** fockMatrix, | ||
1374 | 1374 | double const* atomicElectronPopulation, |
1375 | 1375 | double const* const* const* const* const* const* twoElecTwoCore, |
1376 | 1376 | bool isGuess) const{ |
1377 | + int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
1378 | + int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
1379 | + int totalNumberAOs = molecule.GetTotalNumberAOs(); | |
1380 | + int totalNumberAtoms = molecule.GetNumberAtoms(); | |
1377 | 1381 | MallocerFreer::GetInstance()->Initialize<double>(fockMatrix, |
1378 | - molecule.GetTotalNumberAOs(), | |
1379 | - molecule.GetTotalNumberAOs()); | |
1380 | - int totalNumberAtoms=molecule.GetNumberAtoms(); | |
1381 | - stringstream ompErrors; | |
1382 | -#pragma omp parallel for schedule(auto) | |
1382 | + totalNumberAOs, | |
1383 | + totalNumberAOs); | |
1383 | 1384 | for(int A=0; A<totalNumberAtoms; A++){ |
1384 | - try{ | |
1385 | - const Atom& atomA = *molecule.GetAtom(A); | |
1386 | - int firstAOIndexA = atomA.GetFirstAOIndex(); | |
1387 | - int lastAOIndexA = atomA.GetLastAOIndex(); | |
1385 | + const Atom& atomA = *molecule.GetAtom(A); | |
1386 | + int firstAOIndexA = atomA.GetFirstAOIndex(); | |
1387 | + int lastAOIndexA = atomA.GetLastAOIndex(); | |
1388 | + for(int mu=firstAOIndexA; mu<=lastAOIndexA; mu++){ | |
1389 | + if(mu%mpiSize != mpiRank){continue;} | |
1390 | + | |
1391 | + stringstream ompErrors; | |
1392 | +#pragma omp parallel for schedule(auto) | |
1388 | 1393 | for(int B=A; B<totalNumberAtoms; B++){ |
1389 | - const Atom& atomB = *molecule.GetAtom(B); | |
1390 | - int firstAOIndexB = atomB.GetFirstAOIndex(); | |
1391 | - int lastAOIndexB = atomB.GetLastAOIndex(); | |
1392 | - for(int mu=firstAOIndexA; mu<=lastAOIndexA; mu++){ | |
1394 | + try{ | |
1395 | + const Atom& atomB = *molecule.GetAtom(B); | |
1396 | + int firstAOIndexB = atomB.GetFirstAOIndex(); | |
1397 | + int lastAOIndexB = atomB.GetLastAOIndex(); | |
1393 | 1398 | for(int nu=firstAOIndexB; nu<=lastAOIndexB; nu++){ |
1394 | 1399 | if(mu == nu){ |
1395 | 1400 | // diagonal part |
@@ -1421,20 +1426,44 @@ void Cndo2::CalcFockMatrix(double** fockMatrix, | ||
1421 | 1426 | else{ |
1422 | 1427 | // lower left part (not calculated) |
1423 | 1428 | } |
1424 | - | |
1425 | - } | |
1429 | + } // end of loop nu | |
1430 | + } // end of try | |
1431 | + catch(MolDSException ex){ | |
1432 | +#pragma omp critical | |
1433 | + ex.Serialize(ompErrors); | |
1426 | 1434 | } |
1435 | + } // end of loop B parallelized with openMP | |
1436 | + // Exception throwing for omp-region | |
1437 | + if(!ompErrors.str().empty()){ | |
1438 | + throw MolDSException::Deserialize(ompErrors); | |
1427 | 1439 | } |
1428 | - } | |
1429 | - catch(MolDSException ex){ | |
1430 | -#pragma omp critical | |
1431 | - ex.Serialize(ompErrors); | |
1440 | + } // end of loop mu parallelized with MPI | |
1441 | + } // end of loop A | |
1442 | + | |
1443 | + // communication to collect all matrix data on head-rank | |
1444 | + int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); | |
1445 | + if(mpiRank == mpiHeadRank){ | |
1446 | + // receive the matrix data from other ranks | |
1447 | + for(int mu=0; mu<totalNumberAOs; mu++){ | |
1448 | + if(mu%mpiSize == mpiHeadRank){continue;} | |
1449 | + int source = mu%mpiSize; | |
1450 | + int tag = mu; | |
1451 | + MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tag, fockMatrix[mu], totalNumberAOs); | |
1432 | 1452 | } |
1433 | 1453 | } |
1434 | - // Exception throwing for omp-region | |
1435 | - if(!ompErrors.str().empty()){ | |
1436 | - throw MolDSException::Deserialize(ompErrors); | |
1454 | + else{ | |
1455 | + // send the matrix data to head-rank | |
1456 | + for(int mu=0; mu<totalNumberAOs; mu++){ | |
1457 | + if(mu%mpiSize != mpiRank){continue;} | |
1458 | + int dest = mpiHeadRank; | |
1459 | + int tag = mu; | |
1460 | + MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tag, fockMatrix[mu], totalNumberAOs); | |
1461 | + } | |
1437 | 1462 | } |
1463 | + // broadcast all matrix data to all rank | |
1464 | + int root=mpiHeadRank; | |
1465 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&fockMatrix[0][0], totalNumberAOs*totalNumberAOs, root); | |
1466 | + | |
1438 | 1467 | /* |
1439 | 1468 | this->OutputLog("fock matrix\n"); |
1440 | 1469 | for(int o=0; o<this->molecule.GetTotalNumberAOs(); o++){ |
@@ -1561,16 +1590,21 @@ void Cndo2::CalcAtomicElectronPopulation(double* atomicElectronPopulation, | ||
1561 | 1590 | |
1562 | 1591 | // calculate gammaAB matrix. (B.56) and (B.62) in J. A. Pople book. |
1563 | 1592 | void Cndo2::CalcGammaAB(double** gammaAB, const Molecule& molecule) const{ |
1593 | + int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
1594 | + int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
1564 | 1595 | int totalAtomNumber = molecule.GetNumberAtoms(); |
1565 | - stringstream ompErrors; | |
1566 | -#pragma omp parallel for schedule(auto) | |
1596 | + | |
1597 | + // This loop (A) is parallelized by MPI | |
1567 | 1598 | for(int A=0; A<totalAtomNumber; A++){ |
1568 | - try{ | |
1569 | - const Atom& atomA = *molecule.GetAtom(A); | |
1570 | - int na = atomA.GetValenceShellType() + 1; | |
1571 | - double orbitalExponentA = atomA.GetOrbitalExponent( | |
1572 | - atomA.GetValenceShellType(), s, this->theory); | |
1573 | - for(int B=A; B<totalAtomNumber; B++){ | |
1599 | + if(A%mpiSize != mpiRank){continue;} | |
1600 | + const Atom& atomA = *molecule.GetAtom(A); | |
1601 | + int na = atomA.GetValenceShellType() + 1; | |
1602 | + double orbitalExponentA = atomA.GetOrbitalExponent( | |
1603 | + atomA.GetValenceShellType(), s, this->theory); | |
1604 | + stringstream ompErrors; | |
1605 | +#pragma omp parallel for schedule(auto) | |
1606 | + for(int B=A; B<totalAtomNumber; B++){ | |
1607 | + try{ | |
1574 | 1608 | const Atom& atomB = *molecule.GetAtom(B); |
1575 | 1609 | int nb = atomB.GetValenceShellType() + 1; |
1576 | 1610 | double orbitalExponentB = atomB.GetOrbitalExponent( |
@@ -1619,16 +1653,41 @@ void Cndo2::CalcGammaAB(double** gammaAB, const Molecule& molecule) const{ | ||
1619 | 1653 | } |
1620 | 1654 | gammaAB[A][B] = value; |
1621 | 1655 | } |
1656 | + catch(MolDSException ex){ | |
1657 | + #pragma omp critical | |
1658 | + ex.Serialize(ompErrors); | |
1659 | + } | |
1660 | + } // end of loop B parallelized by openMP | |
1661 | + // Exception throwing for omp-region | |
1662 | + if(!ompErrors.str().empty()){ | |
1663 | + throw MolDSException::Deserialize(ompErrors); | |
1622 | 1664 | } |
1623 | - catch(MolDSException ex){ | |
1624 | -#pragma omp critical | |
1625 | - ex.Serialize(ompErrors); | |
1665 | + } // end of loop A prallelized by MPI | |
1666 | + | |
1667 | + // communication to collect all matrix data on head-rank | |
1668 | + int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); | |
1669 | + if(mpiRank == mpiHeadRank){ | |
1670 | + // receive the matrix data from other ranks | |
1671 | + for(int A=0; A<totalAtomNumber; A++){ | |
1672 | + if(A%mpiSize == mpiHeadRank){continue;} | |
1673 | + int source = A%mpiSize; | |
1674 | + int tag = A; | |
1675 | + MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tag, &gammaAB[A][A], totalAtomNumber-A); | |
1626 | 1676 | } |
1627 | 1677 | } |
1628 | - // Exception throwing for omp-region | |
1629 | - if(!ompErrors.str().empty()){ | |
1630 | - throw MolDSException::Deserialize(ompErrors); | |
1678 | + else{ | |
1679 | + // send the matrix data to head-rank | |
1680 | + for(int A=0; A<totalAtomNumber; A++){ | |
1681 | + if(A%mpiSize != mpiRank){continue;} | |
1682 | + int dest = mpiHeadRank; | |
1683 | + int tag = A; | |
1684 | + MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tag, &gammaAB[A][A], totalAtomNumber-A); | |
1685 | + } | |
1631 | 1686 | } |
1687 | + // broadcast all matrix data to all rank | |
1688 | + int root=mpiHeadRank; | |
1689 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&gammaAB[0][0], totalAtomNumber*totalAtomNumber, root); | |
1690 | + | |
1632 | 1691 | |
1633 | 1692 | #pragma omp parallel for schedule(auto) |
1634 | 1693 | for(int A=0; A<totalAtomNumber; A++){ |
@@ -1727,21 +1786,25 @@ void Cndo2::CalcElectronicTransitionDipoleMoment(double* transitionDipoleMoment, | ||
1727 | 1786 | void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix, |
1728 | 1787 | const Molecule& molecule, |
1729 | 1788 | STOnGType stonG) const{ |
1730 | - int totalAONumber = molecule.GetTotalNumberAOs(); | |
1789 | + int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
1790 | + int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
1791 | + int totalAONumber = molecule.GetTotalNumberAOs(); | |
1731 | 1792 | int totalAtomNumber = molecule.GetNumberAtoms(); |
1732 | 1793 | |
1733 | - stringstream ompErrors; | |
1734 | -#pragma omp parallel for schedule(auto) | |
1794 | + // This loop (A and mu) is parallelized by MPI | |
1735 | 1795 | for(int A=0; A<totalAtomNumber; A++){ |
1736 | - try{ | |
1737 | - const Atom& atomA = *molecule.GetAtom(A); | |
1738 | - int firstAOIndexAtomA = atomA.GetFirstAOIndex(); | |
1796 | + const Atom& atomA = *molecule.GetAtom(A); | |
1797 | + int firstAOIndexAtomA = atomA.GetFirstAOIndex(); | |
1798 | + for(int a=0; a<atomA.GetValenceSize(); a++){ | |
1799 | + int mu = firstAOIndexAtomA + a; | |
1800 | + if(mu%mpiSize != mpiRank){continue;} | |
1801 | + stringstream ompErrors; | |
1802 | + #pragma omp parallel for schedule(auto) | |
1739 | 1803 | for(int B=0; B<totalAtomNumber; B++){ |
1740 | - const Atom& atomB = *molecule.GetAtom(B); | |
1741 | - int firstAOIndexAtomB = atomB.GetFirstAOIndex(); | |
1742 | - for(int a=0; a<atomA.GetValenceSize(); a++){ | |
1804 | + try{ | |
1805 | + const Atom& atomB = *molecule.GetAtom(B); | |
1806 | + int firstAOIndexAtomB = atomB.GetFirstAOIndex(); | |
1743 | 1807 | for(int b=0; b<atomB.GetValenceSize(); b++){ |
1744 | - int mu = firstAOIndexAtomA + a; | |
1745 | 1808 | int nu = firstAOIndexAtomB + b; |
1746 | 1809 | this->CalcCartesianMatrixElementsByGTOExpansion(cartesianMatrix[XAxis][mu][nu], |
1747 | 1810 | cartesianMatrix[YAxis][mu][nu], |
@@ -1749,18 +1812,51 @@ void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix, | ||
1749 | 1812 | atomA, a, atomB, b, stonG); |
1750 | 1813 | } |
1751 | 1814 | } |
1752 | - | |
1815 | + catch(MolDSException ex){ | |
1816 | + #pragma omp critical | |
1817 | + ex.Serialize(ompErrors); | |
1818 | + } | |
1819 | + }// end of loop for int B with openMP | |
1820 | + // Exception throwing for omp-region | |
1821 | + if(!ompErrors.str().empty()){ | |
1822 | + throw MolDSException::Deserialize(ompErrors); | |
1753 | 1823 | } |
1754 | - } | |
1755 | - catch(MolDSException ex){ | |
1756 | -#pragma omp critical | |
1757 | - ex.Serialize(ompErrors); | |
1824 | + } | |
1825 | + } // end of loop for int A with openMP | |
1826 | + | |
1827 | + // communication to collect all matrix data on head-rank | |
1828 | + int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); | |
1829 | + if(mpiRank == mpiHeadRank){ | |
1830 | + // receive the matrix data from other ranks | |
1831 | + for(int mu=0; mu<totalAONumber; mu++){ | |
1832 | + if(mu%mpiSize == mpiHeadRank){continue;} | |
1833 | + int source = mu%mpiSize; | |
1834 | + int tagBase = 3*mu; | |
1835 | + int tagX = tagBase + XAxis; | |
1836 | + int tagY = tagBase + YAxis; | |
1837 | + int tagZ = tagBase + ZAxis; | |
1838 | + MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tagX, cartesianMatrix[XAxis][mu], totalAONumber); | |
1839 | + MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tagY, cartesianMatrix[YAxis][mu], totalAONumber); | |
1840 | + MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tagZ, cartesianMatrix[ZAxis][mu], totalAONumber); | |
1758 | 1841 | } |
1759 | 1842 | } |
1760 | - // Exception throwing for omp-region | |
1761 | - if(!ompErrors.str().empty()){ | |
1762 | - throw MolDSException::Deserialize(ompErrors); | |
1763 | - } | |
1843 | + else{ | |
1844 | + // send the matrix data to head-rank | |
1845 | + for(int mu=0; mu<totalAONumber; mu++){ | |
1846 | + if(mu%mpiSize != mpiRank){continue;} | |
1847 | + int dest = mpiHeadRank; | |
1848 | + int tagBase = 3*mu; | |
1849 | + int tagX = tagBase + XAxis; | |
1850 | + int tagY = tagBase + YAxis; | |
1851 | + int tagZ = tagBase + ZAxis; | |
1852 | + MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tagX, cartesianMatrix[XAxis][mu], totalAONumber); | |
1853 | + MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tagY, cartesianMatrix[YAxis][mu], totalAONumber); | |
1854 | + MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tagZ, cartesianMatrix[ZAxis][mu], totalAONumber); | |
1855 | + } | |
1856 | + } | |
1857 | + // broadcast all matrix data to all rank | |
1858 | + int root=mpiHeadRank; | |
1859 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&cartesianMatrix[0][0][0], CartesianType_end*totalAONumber*totalAONumber, root); | |
1764 | 1860 | } |
1765 | 1861 | |
1766 | 1862 | // Calculate elements of Cartesian matrix between atomic orbitals. |
@@ -3721,49 +3817,64 @@ void Cndo2::CalcOverlapESsWithAnotherElectronicStructure(double** overlapESs, | ||
3721 | 3817 | |
3722 | 3818 | // calculate OverlapAOs matrix. E.g. S_{\mu\nu} in (3.74) in J. A. Pople book. |
3723 | 3819 | void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{ |
3820 | + int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
3821 | + int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
3724 | 3822 | int totalAONumber = molecule.GetTotalNumberAOs(); |
3725 | 3823 | int totalAtomNumber = molecule.GetNumberAtoms(); |
3824 | + MallocerFreer::GetInstance()->Initialize<double>(overlapAOs, | |
3825 | + totalAONumber, | |
3826 | + totalAONumber); | |
3726 | 3827 | |
3727 | - stringstream ompErrors; | |
3828 | + // This loop A is parallelized with MPI | |
3829 | + for(int A=0; A<totalAtomNumber; A++){ | |
3830 | + const Atom& atomA = *molecule.GetAtom(A); | |
3831 | + if(A%mpiSize != mpiRank){continue;} | |
3832 | + | |
3833 | + stringstream ompErrors; | |
3728 | 3834 | #pragma omp parallel |
3729 | - { | |
3730 | - double** diatomicOverlapAOs = NULL; | |
3731 | - double** rotatingMatrix = NULL; | |
3732 | - try{ | |
3733 | - // malloc | |
3734 | - MallocerFreer::GetInstance()->Malloc<double>(&diatomicOverlapAOs, | |
3735 | - OrbitalType_end, | |
3736 | - OrbitalType_end); | |
3737 | - MallocerFreer::GetInstance()->Malloc<double>(&rotatingMatrix, | |
3738 | - OrbitalType_end, | |
3739 | - OrbitalType_end); | |
3740 | - // calculation overlapAOs matrix | |
3741 | - for(int mu=0; mu<totalAONumber; mu++){ | |
3742 | - overlapAOs[mu][mu] = 1.0; | |
3743 | - } | |
3835 | + { | |
3836 | + double** diatomicOverlapAOs = NULL; | |
3837 | + double** rotatingMatrix = NULL; | |
3838 | + try{ | |
3839 | + // malloc | |
3840 | + MallocerFreer::GetInstance()->Malloc<double>(&diatomicOverlapAOs, | |
3841 | + OrbitalType_end, | |
3842 | + OrbitalType_end); | |
3843 | + MallocerFreer::GetInstance()->Malloc<double>(&rotatingMatrix, | |
3844 | + OrbitalType_end, | |
3845 | + OrbitalType_end); | |
3744 | 3846 | |
3745 | 3847 | #pragma omp for schedule(auto) |
3746 | - for(int A=0; A<totalAtomNumber; A++){ | |
3747 | - const Atom& atomA = *molecule.GetAtom(A); | |
3748 | 3848 | for(int B=A+1; B<totalAtomNumber; B++){ |
3749 | 3849 | const Atom& atomB = *molecule.GetAtom(B); |
3750 | 3850 | this->CalcDiatomicOverlapAOsInDiatomicFrame(diatomicOverlapAOs, atomA, atomB); |
3751 | 3851 | this->CalcRotatingMatrix(rotatingMatrix, atomA, atomB); |
3752 | 3852 | this->RotateDiatmicOverlapAOsToSpaceFrame(diatomicOverlapAOs, rotatingMatrix); |
3753 | 3853 | this->SetOverlapAOsElement(overlapAOs, diatomicOverlapAOs, atomA, atomB); |
3754 | - } | |
3755 | - } | |
3756 | - } | |
3757 | - catch(MolDSException ex){ | |
3854 | + } // end of loop B parallelized with openMP | |
3855 | + | |
3856 | + } // end of try | |
3857 | + catch(MolDSException ex){ | |
3758 | 3858 | #pragma omp critical |
3759 | - ex.Serialize(ompErrors); | |
3859 | + ex.Serialize(ompErrors); | |
3860 | + } | |
3861 | + this->FreeDiatomicOverlapAOsAndRotatingMatrix(&diatomicOverlapAOs, &rotatingMatrix); | |
3862 | + } // end of omp-parallelized region | |
3863 | + // Exception throwing for omp-region | |
3864 | + if(!ompErrors.str().empty()){ | |
3865 | + throw MolDSException::Deserialize(ompErrors); | |
3760 | 3866 | } |
3761 | - this->FreeDiatomicOverlapAOsAndRotatingMatrix(&diatomicOverlapAOs, &rotatingMatrix); | |
3762 | - } | |
3763 | - // Exception throwing for omp-region | |
3764 | - if(!ompErrors.str().empty()){ | |
3765 | - throw MolDSException::Deserialize(ompErrors); | |
3867 | + } // end of loop A parallelized with MPI | |
3868 | + | |
3869 | + // communication to reduce thsi->matrixForce on all node (namely, all_reduce) | |
3870 | + int numTransported = totalAONumber*totalAONumber; | |
3871 | + MolDS_mpi::MpiProcess::GetInstance()->AllReduce(&overlapAOs[0][0], numTransported, std::plus<double>()); | |
3872 | + | |
3873 | + #pragma omp parallel for schedule(auto) | |
3874 | + for(int mu=0; mu<totalAONumber; mu++){ | |
3875 | + overlapAOs[mu][mu] = 1.0; | |
3766 | 3876 | } |
3877 | + | |
3767 | 3878 | /* |
3768 | 3879 | this->OutputLog("overlapAOs matrix\n"); |
3769 | 3880 | for(int o=0; o<molecule.GetTotalNumberAOs(); o++){ |
@@ -3416,29 +3416,37 @@ double Mndo::GetAuxiliaryKNRKRElement(int moI, int moJ, int moK, int moL) const{ | ||
3416 | 3416 | |
3417 | 3417 | void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore, |
3418 | 3418 | const Molecule& molecule) const{ |
3419 | + int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
3420 | + int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
3421 | + int totalAtomNumber = molecule.GetNumberAtoms(); | |
3422 | + | |
3419 | 3423 | #ifdef MOLDS_DBG |
3420 | 3424 | if(twoElecTwoCore == NULL){ |
3421 | 3425 | throw MolDSException(this->errorMessageCalcTwoElecTwoCoreNullMatrix); |
3422 | 3426 | } |
3423 | 3427 | #endif |
3424 | 3428 | MallocerFreer::GetInstance()->Initialize<double>(twoElecTwoCore, |
3425 | - molecule.GetNumberAtoms(), | |
3426 | - molecule.GetNumberAtoms(), | |
3429 | + totalAtomNumber, | |
3430 | + totalAtomNumber, | |
3427 | 3431 | dxy, dxy, dxy, dxy); |
3428 | 3432 | |
3429 | - stringstream ompErrors; | |
3433 | + | |
3434 | + // this loop-a is MPI-parallelized | |
3435 | + for(int a=0; a<totalAtomNumber; a++){ | |
3436 | + if(a%mpiSize != mpiRank){continue;} | |
3437 | + stringstream ompErrors; | |
3430 | 3438 | #pragma omp parallel |
3431 | - { | |
3432 | - double**** diatomicTwoElecTwoCore = NULL; | |
3433 | - double** tmpRotMat = NULL; | |
3434 | - try{ | |
3435 | - MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy); | |
3436 | - MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end); | |
3437 | - // note that terms with condition a==b are not needed to calculate. | |
3439 | + { | |
3440 | + double**** diatomicTwoElecTwoCore = NULL; | |
3441 | + double** tmpRotMat = NULL; | |
3442 | + try{ | |
3443 | + MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy); | |
3444 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end); | |
3445 | + // note that terms with condition a==b are not needed to calculate. | |
3438 | 3446 | #pragma omp for schedule(auto) |
3439 | - for(int a=0; a<molecule.GetNumberAtoms(); a++){ | |
3440 | - for(int b=a+1; b<molecule.GetNumberAtoms(); b++){ | |
3447 | + for(int b=a+1; b<totalAtomNumber; b++){ | |
3441 | 3448 | this->CalcDiatomicTwoElecTwoCore(diatomicTwoElecTwoCore, tmpRotMat, a, b); |
3449 | + | |
3442 | 3450 | for(int mu=0; mu<dxy; mu++){ |
3443 | 3451 | for(int nu=mu; nu<dxy; nu++){ |
3444 | 3452 | for(int lambda=0; lambda<dxy; lambda++){ |
@@ -3456,20 +3464,26 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore, | ||
3456 | 3464 | } |
3457 | 3465 | } |
3458 | 3466 | } |
3459 | - } | |
3460 | - } | |
3461 | - } | |
3462 | - catch(MolDSException ex){ | |
3467 | + | |
3468 | + } // end of loop b parallelized with MPI | |
3469 | + | |
3470 | + } // end of try | |
3471 | + catch(MolDSException ex){ | |
3463 | 3472 | #pragma omp critical |
3464 | - ex.Serialize(ompErrors); | |
3473 | + ex.Serialize(ompErrors); | |
3474 | + } | |
3475 | + MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy); | |
3476 | + MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end); | |
3477 | + } // end of omp-parallelized region | |
3478 | + // Exception throwing for omp-region | |
3479 | + if(!ompErrors.str().empty()){ | |
3480 | + throw MolDSException::Deserialize(ompErrors); | |
3465 | 3481 | } |
3466 | - MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy); | |
3467 | - MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end); | |
3468 | - } | |
3469 | - // Exception throwing for omp-region | |
3470 | - if(!ompErrors.str().empty()){ | |
3471 | - throw MolDSException::Deserialize(ompErrors); | |
3472 | - } | |
3482 | + } // end of loop a parallelized with MPI | |
3483 | + | |
3484 | + // communication to reduce thsi->matrixForce on all node (namely, all_reduce) | |
3485 | + int numTransported = totalAtomNumber*totalAtomNumber*dxy*dxy*dxy*dxy; | |
3486 | + MolDS_mpi::MpiProcess::GetInstance()->AllReduce(&twoElecTwoCore[0][0][0][0][0][0], numTransported, std::plus<double>()); | |
3473 | 3487 | } |
3474 | 3488 | |
3475 | 3489 | // Calculation of two electrons two cores integral (mu, nu | lambda, sigma) in space fixed frame, |