Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions libpimeval/src/libpimeval.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -535,19 +535,19 @@ pimRotateElementsLeft(PimObjId src)
return ok ? PIM_OK : PIM_ERROR;
}

//! @brief Shift elements of an obj by one step to the right and fill zero
//! @brief Shift elements of an obj by one step to the right and fill zero. Only shifts across region boundaries if useCrossRegionCommunication==true. If false, shift in zeros at region boundaries.
PimStatus
pimShiftElementsRight(PimObjId src)
pimShiftElementsRight(PimObjId src, bool useCrossRegionCommunication)
{
bool ok = pimSim::get()->pimShiftElementsRight(src);
bool ok = pimSim::get()->pimShiftElementsRight(src, useCrossRegionCommunication);
return ok ? PIM_OK : PIM_ERROR;
}

//! @brief Shift elements of an obj by one step to the left and fill zero
//! @brief Shift elements of an obj by one step to the left and fill zero. Only shifts across region boundaries if useCrossRegionCommunication==true. If false, shift in zeros at region boundaries.
PimStatus
pimShiftElementsLeft(PimObjId src)
pimShiftElementsLeft(PimObjId src, bool useCrossRegionCommunication)
{
bool ok = pimSim::get()->pimShiftElementsLeft(src);
bool ok = pimSim::get()->pimShiftElementsLeft(src, useCrossRegionCommunication);
return ok ? PIM_OK : PIM_ERROR;
}

Expand Down
4 changes: 2 additions & 2 deletions libpimeval/src/libpimeval.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ PimStatus pimBroadcastUInt(PimObjId dest, uint64_t value);
PimStatus pimBroadcastFP(PimObjId dest, float value);
PimStatus pimRotateElementsRight(PimObjId src);
PimStatus pimRotateElementsLeft(PimObjId src);
PimStatus pimShiftElementsRight(PimObjId src);
PimStatus pimShiftElementsLeft(PimObjId src);
PimStatus pimShiftElementsRight(PimObjId src, bool useCrossRegionCommunication = true);
PimStatus pimShiftElementsLeft(PimObjId src, bool useCrossRegionCommunication = true);
PimStatus pimShiftBitsRight(PimObjId src, PimObjId dest, unsigned shiftAmount);
PimStatus pimShiftBitsLeft(PimObjId src, PimObjId dest, unsigned shiftAmount);

Expand Down
54 changes: 28 additions & 26 deletions libpimeval/src/pimCmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1221,34 +1221,36 @@ pimCmdRotate::execute()

computeAllRegions(numRegions);

// handle region boundaries
if (m_cmdType == PimCmdEnum::ROTATE_ELEM_R || m_cmdType == PimCmdEnum::SHIFT_ELEM_R) {
for (unsigned i = 0; i < numRegions; ++i) {
const pimRegion &srcRegion = objSrc.getRegions()[i];
uint64_t elemIdxBegin = srcRegion.getElemIdxBegin();
uint64_t val = 0;
if (i == 0 && m_cmdType == PimCmdEnum::ROTATE_ELEM_R) {
val = m_regionBoundary[numRegions - 1];
} else if (i > 0) {
val = m_regionBoundary[i - 1];
// handle region boundaries only if using cross region communication
if(m_useCrossRegionCommunication) {
if (m_cmdType == PimCmdEnum::ROTATE_ELEM_R || m_cmdType == PimCmdEnum::SHIFT_ELEM_R) {
for (unsigned i = 0; i < numRegions; ++i) {
const pimRegion &srcRegion = objSrc.getRegions()[i];
uint64_t elemIdxBegin = srcRegion.getElemIdxBegin();
uint64_t val = 0;
if (i == 0 && m_cmdType == PimCmdEnum::ROTATE_ELEM_R) {
val = m_regionBoundary[numRegions - 1];
} else if (i > 0) {
val = m_regionBoundary[i - 1];
}
objSrc.setElement(elemIdxBegin, val);
}
objSrc.setElement(elemIdxBegin, val);
}
} else if (m_cmdType == PimCmdEnum::ROTATE_ELEM_L || m_cmdType == PimCmdEnum::SHIFT_ELEM_L) {
for (unsigned i = 0; i < numRegions; ++i) {
const pimRegion &srcRegion = objSrc.getRegions()[i];
unsigned numElementsInRegion = srcRegion.getNumElemInRegion();
uint64_t elemIdxBegin = srcRegion.getElemIdxBegin();
uint64_t val = 0;
if (i == numRegions - 1 && m_cmdType == PimCmdEnum::ROTATE_ELEM_L) {
val = m_regionBoundary[0];
} else if (i < numRegions - 1) {
val = m_regionBoundary[i + 1];
} else if (m_cmdType == PimCmdEnum::ROTATE_ELEM_L || m_cmdType == PimCmdEnum::SHIFT_ELEM_L) {
for (unsigned i = 0; i < numRegions; ++i) {
const pimRegion &srcRegion = objSrc.getRegions()[i];
unsigned numElementsInRegion = srcRegion.getNumElemInRegion();
uint64_t elemIdxBegin = srcRegion.getElemIdxBegin();
uint64_t val = 0;
if (i == numRegions - 1 && m_cmdType == PimCmdEnum::ROTATE_ELEM_L) {
val = m_regionBoundary[0];
} else if (i < numRegions - 1) {
val = m_regionBoundary[i + 1];
}
objSrc.setElement(elemIdxBegin + numElementsInRegion - 1, val);
}
objSrc.setElement(elemIdxBegin + numElementsInRegion - 1, val);
} else {
assert(0);
}
} else {
assert(0);
}

if (pimSim::get()->getDeviceType() != PIM_FUNCTIONAL) {
Expand Down Expand Up @@ -1323,7 +1325,7 @@ pimCmdRotate::updateStats() const
PimDataType dataType = objSrc.getDataType();
bool isVLayout = objSrc.isVLayout();

pimeval::perfEnergy mPerfEnergy = pimSim::get()->getPerfEnergyModel()->getPerfEnergyForRotate(m_cmdType, objSrc);
pimeval::perfEnergy mPerfEnergy = pimSim::get()->getPerfEnergyModel()->getPerfEnergyForRotate(m_cmdType, objSrc, m_useCrossRegionCommunication);
pimSim::get()->getStatsMgr()->recordCmd(getName(dataType, isVLayout), mPerfEnergy);
return true;
}
Expand Down
5 changes: 3 additions & 2 deletions libpimeval/src/pimCmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -559,8 +559,8 @@ class pimCmdBroadcast : public pimCmd
class pimCmdRotate : public pimCmd
{
public:
pimCmdRotate(PimCmdEnum cmdType, PimObjId src)
: pimCmd(cmdType), m_src(src)
pimCmdRotate(PimCmdEnum cmdType, PimObjId src, bool useCrossRegionCommunication)
: pimCmd(cmdType), m_src(src), m_useCrossRegionCommunication(useCrossRegionCommunication)
{
assert(cmdType == PimCmdEnum::ROTATE_ELEM_R || cmdType == PimCmdEnum::ROTATE_ELEM_L ||
cmdType == PimCmdEnum::SHIFT_ELEM_R || cmdType == PimCmdEnum::SHIFT_ELEM_L);
Expand All @@ -573,6 +573,7 @@ class pimCmdRotate : public pimCmd
protected:
PimObjId m_src;
std::vector<uint64_t> m_regionBoundary;
bool m_useCrossRegionCommunication;
};

//! @class pimCmdReadRowToSa
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyAim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ pimPerfEnergyAim::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo

//! @brief Perf energy model of aim for rotate
pimeval::perfEnergy
pimPerfEnergyAim::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const
pimPerfEnergyAim::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const
{
double msRuntime = 0.0;
double mjEnergy = 0.0;
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyAim.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class pimPerfEnergyAim : public pimPerfEnergyBase
virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override;
virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override;
virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override;
virtual pimeval::perfEnergy getPerfEnergyForMac(PimCmdEnum cmdType, const pimObjInfo& obj) const override;

protected:
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyAquabolt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ pimPerfEnergyAquabolt::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimOb

//! @brief Perf energy model of aquabolt PIM for rotate
pimeval::perfEnergy
pimPerfEnergyAquabolt::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const
pimPerfEnergyAquabolt::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const
{
double msRuntime = 0.0;
double mjEnergy = 0.0;
Expand Down
4 changes: 2 additions & 2 deletions libpimeval/src/pimPerfEnergyAquabolt.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ class pimPerfEnergyAquabolt : public pimPerfEnergyBase
virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override;
virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override;
virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override;

protected:
unsigned m_aquaboltFPUBitWidth = 16;
// TODO: Update for Aquabolt
Expand Down
16 changes: 10 additions & 6 deletions libpimeval/src/pimPerfEnergyBankLevel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ pimPerfEnergyBankLevel::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimO
// TODO: This needs to be revisited
//! @brief Perf energy model of bank-level PIM for rotate
pimeval::perfEnergy
pimPerfEnergyBankLevel::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const
pimPerfEnergyBankLevel::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const
{
double msRuntime = 0.0;
double mjEnergy = 0.0;
Expand All @@ -366,8 +366,6 @@ pimPerfEnergyBankLevel::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI
unsigned bitsPerElement = obj.getBitsPerElement(PimBitWidth::ACTUAL);
unsigned numRegions = obj.getRegions().size();
uint64_t totalOp = 0;
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);

// rotate within subarray:
// For every bit: Read row to SA; move SA to R1; Shift R1 by N steps; Move R1 to SA; Write SA to row
Expand All @@ -377,9 +375,15 @@ pimPerfEnergyBankLevel::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI
msRuntime = (m_tR + (bitsPerElement + 2) * m_tL + m_tW); // for one pass
msRuntime *= numPass;
mjEnergy = (m_eAP + (bitsPerElement + 2) * m_eL) * numPass;
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
printf("PIM-Warning: Perf energy model is not precise for PIM command %s\n", pimCmd::getName(cmdType, "").c_str());

// Only handle region boundaries if cross region communication is enabled
if(useCrossRegionCommunication) {
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
printf("PIM-Warning: Perf energy model is not precise for PIM command %s\n", pimCmd::getName(cmdType, "").c_str());
}

return pimeval::perfEnergy(msRuntime, mjEnergy, msRead, msWrite, msCompute, totalOp);
}
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyBankLevel.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class pimPerfEnergyBankLevel : public pimPerfEnergyBase
virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override;
virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override;
virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override;
virtual pimeval::perfEnergy getPerfEnergyForPrefixSum(PimCmdEnum cmdType, const pimObjInfo& obj) const override;

protected:
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ pimPerfEnergyBase::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInf

//! @brief Perf energy model of base class for rotate (placeholder)
pimeval::perfEnergy
pimPerfEnergyBase::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const
pimPerfEnergyBase::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const
{
double msRuntime = 1e10;
double mjEnergy = 999999999.9;
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class pimPerfEnergyBase
virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const;
virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const;
virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const;
virtual pimeval::perfEnergy getPerfEnergyForPrefixSum(PimCmdEnum cmdType, const pimObjInfo& obj) const;
virtual pimeval::perfEnergy getPerfEnergyForMac(PimCmdEnum cmdType, const pimObjInfo& obj) const;

Expand Down
22 changes: 15 additions & 7 deletions libpimeval/src/pimPerfEnergyBitSerial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ pimPerfEnergyBitSerial::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimO

//! @brief Perf energy model of bit-serial PIM for rotate
pimeval::perfEnergy
pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const
pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const
{
double msRuntime = 0.0;
double mjEnergy = 0.0;
Expand All @@ -451,8 +451,6 @@ pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI
unsigned bitsPerElement = obj.getBitsPerElement(PimBitWidth::ACTUAL);
unsigned numRegions = obj.getRegions().size();
unsigned numCore = obj.getNumCoreAvailable();
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);

switch (m_simTarget) {
case PIM_DEVICE_BITSIMD_V:
Expand All @@ -465,8 +463,14 @@ pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI
totalOp += 3 * bitsPerElement * numPass * numCore;
msRuntime = msRead + msWrite + msCompute;
mjEnergy = (m_eAP + 3 * m_eL) * bitsPerElement * numPass; // for one pass
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;

// Only handle region boundaries if cross region communication is enabled
if(useCrossRegionCommunication) {
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
}
break;
case PIM_DEVICE_SIMDRAM:
// todo
Expand All @@ -481,8 +485,12 @@ pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI
msRuntime = (m_tR + (bitsPerElement + 2) * m_tL + m_tW); // for one pass
msRuntime *= numPass;
mjEnergy = (m_eAP + (bitsPerElement + 2) * m_eL) * numPass;
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
if(useCrossRegionCommunication) {
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
}
break;
default:
assert(0);
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyBitSerial.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class pimPerfEnergyBitSerial : public pimPerfEnergyBase
virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override;
virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override;
virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override;
virtual pimeval::perfEnergy getPerfEnergyForPrefixSum(PimCmdEnum cmdType, const pimObjInfo& obj) const override;

protected:
Expand Down
16 changes: 10 additions & 6 deletions libpimeval/src/pimPerfEnergyFulcrum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ pimPerfEnergyFulcrum::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObj

//! @brief Perf energy model of Fulcrum for rotate
pimeval::perfEnergy
pimPerfEnergyFulcrum::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const
pimPerfEnergyFulcrum::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const
{
double msRuntime = 0.0;
double mjEnergy = 0.0;
Expand All @@ -318,8 +318,6 @@ pimPerfEnergyFulcrum::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInf
unsigned bitsPerElement = obj.getBitsPerElement(PimBitWidth::ACTUAL);
unsigned numRegions = obj.getRegions().size();
uint64_t totalOp = 0;
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);

// rotate within subarray:
// For every bit: Read row to SA; move SA to R1; Shift R1 by N steps; Move R1 to SA; Write SA to row
Expand All @@ -330,9 +328,15 @@ pimPerfEnergyFulcrum::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInf
msWrite = m_tW * numPass;
msRuntime = msRead + msWrite + msCompute;
mjEnergy = (m_eAP + (bitsPerElement + 2) * m_eL) * numPass;
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
printf("PIM-Warning: Perf energy model is not precise for PIM command %s\n", pimCmd::getName(cmdType, "").c_str());

// Only handle region boundaries if cross region communication is enabled
if(useCrossRegionCommunication) {
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
printf("PIM-Warning: Perf energy model is not precise for PIM command %s\n", pimCmd::getName(cmdType, "").c_str());
}

return pimeval::perfEnergy(msRuntime, mjEnergy, msRead, msWrite, msCompute, totalOp);
}
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyFulcrum.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class pimPerfEnergyFulcrum : public pimPerfEnergyBase
virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override;
virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override;
virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override;
virtual pimeval::perfEnergy getPerfEnergyForPrefixSum(PimCmdEnum cmdType, const pimObjInfo& obj) const override;

protected:
Expand Down
Loading