@@ -1083,7 +1083,7 @@ namespace dftfe
1083
1083
ExcFamilyType::MGGA))
1084
1084
{
1085
1085
unsigned int relaventDofs = d_basisOperationsPtr->nRelaventDofs ();
1086
- d_BLASWrapperPtr->xcopy (relaventDofs * numberWavefunctions,
1086
+ d_BLASWrapperPtr->xcopy (src. locallyOwnedSize () * numberWavefunctions,
1087
1087
src.data (),
1088
1088
1 ,
1089
1089
d_srcNonLocalTemp.data (),
@@ -1103,14 +1103,14 @@ namespace dftfe
1103
1103
d_basisOperationsPtr
1104
1104
->d_constraintInfo [d_basisOperationsPtr->d_dofHandlerID ]
1105
1105
.distribute_slave_to_master (d_dstNonLocalTemp);
1106
+ d_dstNonLocalTemp.accumulateAddLocallyOwned ();
1107
+ d_dstNonLocalTemp.zeroOutGhosts ();
1106
1108
1107
-
1108
- d_BLASWrapperPtr->axpyStridedBlockAtomicAdd (numberWavefunctions,
1109
- relaventDofs,
1110
- scalarHX,
1111
- d_dstNonLocalTemp.data (),
1112
- dst.data (),
1113
- d_mapNodeIdToProcId.data ());
1109
+ d_BLASWrapperPtr->axpby (dst.locallyOwnedSize () * numberWavefunctions,
1110
+ scalarHX,
1111
+ d_dstNonLocalTemp.data (),
1112
+ 1.0 ,
1113
+ dst.data ());
1114
1114
}
1115
1115
1116
1116
src.zeroOutGhosts ();
@@ -1439,8 +1439,14 @@ namespace dftfe
1439
1439
{
1440
1440
inverseMassVectorScaledConstraintsNoneDataInfoPtr->distribute (src);
1441
1441
if constexpr (memorySpace == dftfe::utils::MemorySpace::HOST)
1442
- if (d_dftParamsPtr->isPseudopotential )
1443
- d_ONCVnonLocalOperator->initialiseOperatorActionOnX (d_kPointIndex);
1442
+ {
1443
+ if (d_dftParamsPtr->isPseudopotential )
1444
+ d_ONCVnonLocalOperator->initialiseOperatorActionOnX (
1445
+ d_kPointIndex);
1446
+
1447
+ d_excManagerPtr->getExcSSDFunctionalObj ()
1448
+ ->reinitKPointDependentVariables (d_kPointIndex);
1449
+ }
1444
1450
#pragma omp parallel for num_threads(d_nOMPThreads)
1445
1451
for (unsigned int iCell = 0 ; iCell < numCells;
1446
1452
iCell += d_cellsBlockSizeHX)
@@ -1565,15 +1571,14 @@ namespace dftfe
1565
1571
ExcFamilyType::MGGA))
1566
1572
{
1567
1573
unsigned int relaventDofs = d_basisOperationsPtr->nRelaventDofs ();
1568
-
1569
- d_BLASWrapperPtr->stridedBlockScaleCopy (
1574
+ d_BLASWrapperPtr->stridedBlockAxpBy (
1570
1575
numberWavefunctions,
1571
- relaventDofs,
1572
- 1.0 ,
1573
- d_basisOperationsPtr->cellInverseMassVectorBasisData ().data (),
1576
+ src.locallyOwnedSize (),
1574
1577
src.data (),
1575
- d_srcNonLocalTemp.data (),
1576
- d_mapNodeIdToProcId.data ());
1578
+ d_basisOperationsPtr->inverseMassVectorBasisData ().data (),
1579
+ 1.0 ,
1580
+ 0.0 ,
1581
+ d_srcNonLocalTemp.data ());
1577
1582
1578
1583
d_srcNonLocalTemp.updateGhostValues ();
1579
1584
d_basisOperationsPtr->distribute (d_srcNonLocalTemp);
@@ -1589,14 +1594,13 @@ namespace dftfe
1589
1594
d_basisOperationsPtr
1590
1595
->d_constraintInfo [d_basisOperationsPtr->d_dofHandlerID ]
1591
1596
.distribute_slave_to_master (d_dstNonLocalTemp);
1592
-
1593
- d_BLASWrapperPtr->axpyStridedBlockAtomicAdd (
1594
- numberWavefunctions,
1595
- relaventDofs,
1596
- scalarHX,
1597
- d_dstNonLocalTemp.data (),
1598
- dst.data (),
1599
- d_mapNodeIdToProcId.data ());
1597
+ d_dstNonLocalTemp.accumulateAddLocallyOwned ();
1598
+ d_dstNonLocalTemp.zeroOutGhosts ();
1599
+ d_BLASWrapperPtr->axpby (relaventDofs * numberWavefunctions,
1600
+ scalarHX,
1601
+ d_dstNonLocalTemp.data (),
1602
+ 1.0 ,
1603
+ dst.data ());
1600
1604
}
1601
1605
}
1602
1606
if (!skip1 && !skip2 && !skip3)
@@ -2012,40 +2016,6 @@ namespace dftfe
2012
2016
(d_excManagerPtr->getExcSSDFunctionalObj ()->getExcFamilyType () ==
2013
2017
ExcFamilyType::MGGA))
2014
2018
{
2015
- // unsigned int relaventDofs =
2016
- // d_basisOperationsPtr->nRelaventDofs();
2017
-
2018
- // d_BLASWrapperPtr->stridedBlockScaleCopy(
2019
- // numberWavefunctions,
2020
- // relaventDofs,
2021
- // 1.0,
2022
- // d_basisOperationsPtr->cellInverseMassVectorBasisData().data(),
2023
- // src.data(),
2024
- // d_srcNonLocalTemp.data(),
2025
- // d_mapNodeIdToProcId.data());
2026
-
2027
- // d_srcNonLocalTemp.updateGhostValues();
2028
- // d_basisOperationsPtr->distribute(d_srcNonLocalTemp);
2029
-
2030
- // d_excManagerPtr->getExcSSDFunctionalObj()
2031
- // ->applyWaveFunctionDependentFuncDerWrtPsi(d_srcNonLocalTemp,
2032
- // d_dstNonLocalTemp,
2033
- // numberWavefunctions,
2034
- // d_kPointIndex,
2035
- // d_spinIndex);
2036
-
2037
-
2038
- // d_basisOperationsPtr
2039
- // ->d_constraintInfo[d_basisOperationsPtr->d_dofHandlerID]
2040
- // .distribute_slave_to_master(d_dstNonLocalTemp);
2041
-
2042
- // d_BLASWrapperPtr->axpyStridedBlockAtomicAdd(
2043
- // numberWavefunctions,
2044
- // relaventDofs,
2045
- // scalarHX,
2046
- // d_dstNonLocalTemp.data(),
2047
- // dst.data(),
2048
- // d_mapNodeIdToProcId.data());
2049
2019
}
2050
2020
}
2051
2021
if (!skip1 && !skip2 && !skip3)
0 commit comments