Skip to content

Commit c3a79e4

Browse files
committed
Hubbard GPU working
1 parent 10f9dca commit c3a79e4

File tree

1 file changed

+29
-59
lines changed

1 file changed

+29
-59
lines changed

src/dftOperator/KohnShamHamiltonianOperator.cc

Lines changed: 29 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,7 +1083,7 @@ namespace dftfe
10831083
ExcFamilyType::MGGA))
10841084
{
10851085
unsigned int relaventDofs = d_basisOperationsPtr->nRelaventDofs();
1086-
d_BLASWrapperPtr->xcopy(relaventDofs * numberWavefunctions,
1086+
d_BLASWrapperPtr->xcopy(src.locallyOwnedSize() * numberWavefunctions,
10871087
src.data(),
10881088
1,
10891089
d_srcNonLocalTemp.data(),
@@ -1103,14 +1103,14 @@ namespace dftfe
11031103
d_basisOperationsPtr
11041104
->d_constraintInfo[d_basisOperationsPtr->d_dofHandlerID]
11051105
.distribute_slave_to_master(d_dstNonLocalTemp);
1106+
d_dstNonLocalTemp.accumulateAddLocallyOwned();
1107+
d_dstNonLocalTemp.zeroOutGhosts();
11061108

1107-
1108-
d_BLASWrapperPtr->axpyStridedBlockAtomicAdd(numberWavefunctions,
1109-
relaventDofs,
1110-
scalarHX,
1111-
d_dstNonLocalTemp.data(),
1112-
dst.data(),
1113-
d_mapNodeIdToProcId.data());
1109+
d_BLASWrapperPtr->axpby(dst.locallyOwnedSize() * numberWavefunctions,
1110+
scalarHX,
1111+
d_dstNonLocalTemp.data(),
1112+
1.0,
1113+
dst.data());
11141114
}
11151115

11161116
src.zeroOutGhosts();
@@ -1439,8 +1439,14 @@ namespace dftfe
14391439
{
14401440
inverseMassVectorScaledConstraintsNoneDataInfoPtr->distribute(src);
14411441
if constexpr (memorySpace == dftfe::utils::MemorySpace::HOST)
1442-
if (d_dftParamsPtr->isPseudopotential)
1443-
d_ONCVnonLocalOperator->initialiseOperatorActionOnX(d_kPointIndex);
1442+
{
1443+
if (d_dftParamsPtr->isPseudopotential)
1444+
d_ONCVnonLocalOperator->initialiseOperatorActionOnX(
1445+
d_kPointIndex);
1446+
1447+
d_excManagerPtr->getExcSSDFunctionalObj()
1448+
->reinitKPointDependentVariables(d_kPointIndex);
1449+
}
14441450
#pragma omp parallel for num_threads(d_nOMPThreads)
14451451
for (unsigned int iCell = 0; iCell < numCells;
14461452
iCell += d_cellsBlockSizeHX)
@@ -1565,15 +1571,14 @@ namespace dftfe
15651571
ExcFamilyType::MGGA))
15661572
{
15671573
unsigned int relaventDofs = d_basisOperationsPtr->nRelaventDofs();
1568-
1569-
d_BLASWrapperPtr->stridedBlockScaleCopy(
1574+
d_BLASWrapperPtr->stridedBlockAxpBy(
15701575
numberWavefunctions,
1571-
relaventDofs,
1572-
1.0,
1573-
d_basisOperationsPtr->cellInverseMassVectorBasisData().data(),
1576+
src.locallyOwnedSize(),
15741577
src.data(),
1575-
d_srcNonLocalTemp.data(),
1576-
d_mapNodeIdToProcId.data());
1578+
d_basisOperationsPtr->inverseMassVectorBasisData().data(),
1579+
1.0,
1580+
0.0,
1581+
d_srcNonLocalTemp.data());
15771582

15781583
d_srcNonLocalTemp.updateGhostValues();
15791584
d_basisOperationsPtr->distribute(d_srcNonLocalTemp);
@@ -1589,14 +1594,13 @@ namespace dftfe
15891594
d_basisOperationsPtr
15901595
->d_constraintInfo[d_basisOperationsPtr->d_dofHandlerID]
15911596
.distribute_slave_to_master(d_dstNonLocalTemp);
1592-
1593-
d_BLASWrapperPtr->axpyStridedBlockAtomicAdd(
1594-
numberWavefunctions,
1595-
relaventDofs,
1596-
scalarHX,
1597-
d_dstNonLocalTemp.data(),
1598-
dst.data(),
1599-
d_mapNodeIdToProcId.data());
1597+
d_dstNonLocalTemp.accumulateAddLocallyOwned();
1598+
d_dstNonLocalTemp.zeroOutGhosts();
1599+
d_BLASWrapperPtr->axpby(relaventDofs * numberWavefunctions,
1600+
scalarHX,
1601+
d_dstNonLocalTemp.data(),
1602+
1.0,
1603+
dst.data());
16001604
}
16011605
}
16021606
if (!skip1 && !skip2 && !skip3)
@@ -2012,40 +2016,6 @@ namespace dftfe
20122016
(d_excManagerPtr->getExcSSDFunctionalObj()->getExcFamilyType() ==
20132017
ExcFamilyType::MGGA))
20142018
{
2015-
// unsigned int relaventDofs =
2016-
// d_basisOperationsPtr->nRelaventDofs();
2017-
2018-
// d_BLASWrapperPtr->stridedBlockScaleCopy(
2019-
// numberWavefunctions,
2020-
// relaventDofs,
2021-
// 1.0,
2022-
// d_basisOperationsPtr->cellInverseMassVectorBasisData().data(),
2023-
// src.data(),
2024-
// d_srcNonLocalTemp.data(),
2025-
// d_mapNodeIdToProcId.data());
2026-
2027-
// d_srcNonLocalTemp.updateGhostValues();
2028-
// d_basisOperationsPtr->distribute(d_srcNonLocalTemp);
2029-
2030-
// d_excManagerPtr->getExcSSDFunctionalObj()
2031-
// ->applyWaveFunctionDependentFuncDerWrtPsi(d_srcNonLocalTemp,
2032-
// d_dstNonLocalTemp,
2033-
// numberWavefunctions,
2034-
// d_kPointIndex,
2035-
// d_spinIndex);
2036-
2037-
2038-
// d_basisOperationsPtr
2039-
// ->d_constraintInfo[d_basisOperationsPtr->d_dofHandlerID]
2040-
// .distribute_slave_to_master(d_dstNonLocalTemp);
2041-
2042-
// d_BLASWrapperPtr->axpyStridedBlockAtomicAdd(
2043-
// numberWavefunctions,
2044-
// relaventDofs,
2045-
// scalarHX,
2046-
// d_dstNonLocalTemp.data(),
2047-
// dst.data(),
2048-
// d_mapNodeIdToProcId.data());
20492019
}
20502020
}
20512021
if (!skip1 && !skip2 && !skip3)

0 commit comments

Comments
 (0)