diff --git a/libpimeval/src/libpimeval.cpp b/libpimeval/src/libpimeval.cpp index ccd21330..08015f03 100644 --- a/libpimeval/src/libpimeval.cpp +++ b/libpimeval/src/libpimeval.cpp @@ -535,19 +535,19 @@ pimRotateElementsLeft(PimObjId src) return ok ? PIM_OK : PIM_ERROR; } -//! @brief Shift elements of an obj by one step to the right and fill zero +//! @brief Shift elements of an obj by one step to the right and fill zero. Only shifts across region boundaries if useCrossRegionCommunication==true. If false, shift in zeros at region boundaries. PimStatus -pimShiftElementsRight(PimObjId src) +pimShiftElementsRight(PimObjId src, bool useCrossRegionCommunication) { - bool ok = pimSim::get()->pimShiftElementsRight(src); + bool ok = pimSim::get()->pimShiftElementsRight(src, useCrossRegionCommunication); return ok ? PIM_OK : PIM_ERROR; } -//! @brief Shift elements of an obj by one step to the left and fill zero +//! @brief Shift elements of an obj by one step to the left and fill zero. Only shifts across region boundaries if useCrossRegionCommunication==true. If false, shift in zeros at region boundaries. PimStatus -pimShiftElementsLeft(PimObjId src) +pimShiftElementsLeft(PimObjId src, bool useCrossRegionCommunication) { - bool ok = pimSim::get()->pimShiftElementsLeft(src); + bool ok = pimSim::get()->pimShiftElementsLeft(src, useCrossRegionCommunication); return ok ? PIM_OK : PIM_ERROR; } diff --git a/libpimeval/src/libpimeval.h b/libpimeval/src/libpimeval.h index b84d6acc..44b74698 100644 --- a/libpimeval/src/libpimeval.h +++ b/libpimeval/src/libpimeval.h @@ -228,8 +228,8 @@ PimStatus pimBroadcastUInt(PimObjId dest, uint64_t value); PimStatus pimBroadcastFP(PimObjId dest, float value); PimStatus pimRotateElementsRight(PimObjId src); PimStatus pimRotateElementsLeft(PimObjId src); -PimStatus pimShiftElementsRight(PimObjId src); -PimStatus pimShiftElementsLeft(PimObjId src); +PimStatus pimShiftElementsRight(PimObjId src, bool useCrossRegionCommunication = true); +PimStatus pimShiftElementsLeft(PimObjId src, bool useCrossRegionCommunication = true); PimStatus pimShiftBitsRight(PimObjId src, PimObjId dest, unsigned shiftAmount); PimStatus pimShiftBitsLeft(PimObjId src, PimObjId dest, unsigned shiftAmount); diff --git a/libpimeval/src/pimCmd.cpp b/libpimeval/src/pimCmd.cpp index 454607c8..11b63b91 100644 --- a/libpimeval/src/pimCmd.cpp +++ b/libpimeval/src/pimCmd.cpp @@ -1221,34 +1221,36 @@ pimCmdRotate::execute() computeAllRegions(numRegions); - // handle region boundaries - if (m_cmdType == PimCmdEnum::ROTATE_ELEM_R || m_cmdType == PimCmdEnum::SHIFT_ELEM_R) { - for (unsigned i = 0; i < numRegions; ++i) { - const pimRegion &srcRegion = objSrc.getRegions()[i]; - uint64_t elemIdxBegin = srcRegion.getElemIdxBegin(); - uint64_t val = 0; - if (i == 0 && m_cmdType == PimCmdEnum::ROTATE_ELEM_R) { - val = m_regionBoundary[numRegions - 1]; - } else if (i > 0) { - val = m_regionBoundary[i - 1]; + // handle region boundaries only if using cross region communication + if(m_useCrossRegionCommunication) { + if (m_cmdType == PimCmdEnum::ROTATE_ELEM_R || m_cmdType == PimCmdEnum::SHIFT_ELEM_R) { + for (unsigned i = 0; i < numRegions; ++i) { + const pimRegion &srcRegion = objSrc.getRegions()[i]; + uint64_t elemIdxBegin = srcRegion.getElemIdxBegin(); + uint64_t val = 0; + if (i == 0 && m_cmdType == PimCmdEnum::ROTATE_ELEM_R) { + val = m_regionBoundary[numRegions - 1]; + } else if (i > 0) { + val = m_regionBoundary[i - 1]; + } + objSrc.setElement(elemIdxBegin, val); } - objSrc.setElement(elemIdxBegin, val); - } - } else if (m_cmdType == PimCmdEnum::ROTATE_ELEM_L || m_cmdType == PimCmdEnum::SHIFT_ELEM_L) { - for (unsigned i = 0; i < numRegions; ++i) { - const pimRegion &srcRegion = objSrc.getRegions()[i]; - unsigned numElementsInRegion = srcRegion.getNumElemInRegion(); - uint64_t elemIdxBegin = srcRegion.getElemIdxBegin(); - uint64_t val = 0; - if (i == numRegions - 1 && m_cmdType == PimCmdEnum::ROTATE_ELEM_L) { - val = m_regionBoundary[0]; - } else if (i < numRegions - 1) { - val = m_regionBoundary[i + 1]; + } else if (m_cmdType == PimCmdEnum::ROTATE_ELEM_L || m_cmdType == PimCmdEnum::SHIFT_ELEM_L) { + for (unsigned i = 0; i < numRegions; ++i) { + const pimRegion &srcRegion = objSrc.getRegions()[i]; + unsigned numElementsInRegion = srcRegion.getNumElemInRegion(); + uint64_t elemIdxBegin = srcRegion.getElemIdxBegin(); + uint64_t val = 0; + if (i == numRegions - 1 && m_cmdType == PimCmdEnum::ROTATE_ELEM_L) { + val = m_regionBoundary[0]; + } else if (i < numRegions - 1) { + val = m_regionBoundary[i + 1]; + } + objSrc.setElement(elemIdxBegin + numElementsInRegion - 1, val); } - objSrc.setElement(elemIdxBegin + numElementsInRegion - 1, val); + } else { + assert(0); } - } else { - assert(0); } if (pimSim::get()->getDeviceType() != PIM_FUNCTIONAL) { @@ -1323,7 +1325,7 @@ pimCmdRotate::updateStats() const PimDataType dataType = objSrc.getDataType(); bool isVLayout = objSrc.isVLayout(); - pimeval::perfEnergy mPerfEnergy = pimSim::get()->getPerfEnergyModel()->getPerfEnergyForRotate(m_cmdType, objSrc); + pimeval::perfEnergy mPerfEnergy = pimSim::get()->getPerfEnergyModel()->getPerfEnergyForRotate(m_cmdType, objSrc, m_useCrossRegionCommunication); pimSim::get()->getStatsMgr()->recordCmd(getName(dataType, isVLayout), mPerfEnergy); return true; } diff --git a/libpimeval/src/pimCmd.h b/libpimeval/src/pimCmd.h index 2941d529..5842f7e0 100644 --- a/libpimeval/src/pimCmd.h +++ b/libpimeval/src/pimCmd.h @@ -559,8 +559,8 @@ class pimCmdBroadcast : public pimCmd class pimCmdRotate : public pimCmd { public: - pimCmdRotate(PimCmdEnum cmdType, PimObjId src) - : pimCmd(cmdType), m_src(src) + pimCmdRotate(PimCmdEnum cmdType, PimObjId src, bool useCrossRegionCommunication) + : pimCmd(cmdType), m_src(src), m_useCrossRegionCommunication(useCrossRegionCommunication) { assert(cmdType == PimCmdEnum::ROTATE_ELEM_R || cmdType == PimCmdEnum::ROTATE_ELEM_L || cmdType == PimCmdEnum::SHIFT_ELEM_R || cmdType == PimCmdEnum::SHIFT_ELEM_L); @@ -573,6 +573,7 @@ class pimCmdRotate : public pimCmd protected: PimObjId m_src; std::vector m_regionBoundary; + bool m_useCrossRegionCommunication; }; //! @class pimCmdReadRowToSa diff --git a/libpimeval/src/pimPerfEnergyAim.cpp b/libpimeval/src/pimPerfEnergyAim.cpp index 1dc73fc4..00190e94 100644 --- a/libpimeval/src/pimPerfEnergyAim.cpp +++ b/libpimeval/src/pimPerfEnergyAim.cpp @@ -132,7 +132,7 @@ pimPerfEnergyAim::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo //! @brief Perf energy model of aim for rotate pimeval::perfEnergy -pimPerfEnergyAim::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const +pimPerfEnergyAim::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const { double msRuntime = 0.0; double mjEnergy = 0.0; diff --git a/libpimeval/src/pimPerfEnergyAim.h b/libpimeval/src/pimPerfEnergyAim.h index bb00cd25..6b35750a 100644 --- a/libpimeval/src/pimPerfEnergyAim.h +++ b/libpimeval/src/pimPerfEnergyAim.h @@ -26,7 +26,7 @@ class pimPerfEnergyAim : public pimPerfEnergyBase virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override; virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override; virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override; - virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override; + virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override; virtual pimeval::perfEnergy getPerfEnergyForMac(PimCmdEnum cmdType, const pimObjInfo& obj) const override; protected: diff --git a/libpimeval/src/pimPerfEnergyAquabolt.cpp b/libpimeval/src/pimPerfEnergyAquabolt.cpp index 845727e2..43cfb658 100644 --- a/libpimeval/src/pimPerfEnergyAquabolt.cpp +++ b/libpimeval/src/pimPerfEnergyAquabolt.cpp @@ -259,7 +259,7 @@ pimPerfEnergyAquabolt::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimOb //! @brief Perf energy model of aquabolt PIM for rotate pimeval::perfEnergy -pimPerfEnergyAquabolt::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const +pimPerfEnergyAquabolt::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const { double msRuntime = 0.0; double mjEnergy = 0.0; diff --git a/libpimeval/src/pimPerfEnergyAquabolt.h b/libpimeval/src/pimPerfEnergyAquabolt.h index 5dcca145..7645853d 100644 --- a/libpimeval/src/pimPerfEnergyAquabolt.h +++ b/libpimeval/src/pimPerfEnergyAquabolt.h @@ -26,8 +26,8 @@ class pimPerfEnergyAquabolt : public pimPerfEnergyBase virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override; virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override; virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override; - virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override; - + virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override; + protected: unsigned m_aquaboltFPUBitWidth = 16; // TODO: Update for Aquabolt diff --git a/libpimeval/src/pimPerfEnergyBankLevel.cpp b/libpimeval/src/pimPerfEnergyBankLevel.cpp index 2ac82c20..047b4dc9 100644 --- a/libpimeval/src/pimPerfEnergyBankLevel.cpp +++ b/libpimeval/src/pimPerfEnergyBankLevel.cpp @@ -355,7 +355,7 @@ pimPerfEnergyBankLevel::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimO // TODO: This needs to be revisited //! @brief Perf energy model of bank-level PIM for rotate pimeval::perfEnergy -pimPerfEnergyBankLevel::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const +pimPerfEnergyBankLevel::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const { double msRuntime = 0.0; double mjEnergy = 0.0; @@ -366,8 +366,6 @@ pimPerfEnergyBankLevel::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI unsigned bitsPerElement = obj.getBitsPerElement(PimBitWidth::ACTUAL); unsigned numRegions = obj.getRegions().size(); uint64_t totalOp = 0; - // boundary handling - assume two times copying between device and host for boundary elements - pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8); // rotate within subarray: // For every bit: Read row to SA; move SA to R1; Shift R1 by N steps; Move R1 to SA; Write SA to row @@ -377,9 +375,15 @@ pimPerfEnergyBankLevel::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI msRuntime = (m_tR + (bitsPerElement + 2) * m_tL + m_tW); // for one pass msRuntime *= numPass; mjEnergy = (m_eAP + (bitsPerElement + 2) * m_eL) * numPass; - msRuntime += 2 * perfEnergyBT.m_msRuntime; - mjEnergy += 2 * perfEnergyBT.m_mjEnergy; - printf("PIM-Warning: Perf energy model is not precise for PIM command %s\n", pimCmd::getName(cmdType, "").c_str()); + + // Only handle region boundaries if cross region communication is enabled + if(useCrossRegionCommunication) { + // boundary handling - assume two times copying between device and host for boundary elements + pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8); + msRuntime += 2 * perfEnergyBT.m_msRuntime; + mjEnergy += 2 * perfEnergyBT.m_mjEnergy; + printf("PIM-Warning: Perf energy model is not precise for PIM command %s\n", pimCmd::getName(cmdType, "").c_str()); + } return pimeval::perfEnergy(msRuntime, mjEnergy, msRead, msWrite, msCompute, totalOp); } diff --git a/libpimeval/src/pimPerfEnergyBankLevel.h b/libpimeval/src/pimPerfEnergyBankLevel.h index 5f869cb2..3442e2ff 100644 --- a/libpimeval/src/pimPerfEnergyBankLevel.h +++ b/libpimeval/src/pimPerfEnergyBankLevel.h @@ -26,7 +26,7 @@ class pimPerfEnergyBankLevel : public pimPerfEnergyBase virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override; virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override; virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override; - virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override; + virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override; virtual pimeval::perfEnergy getPerfEnergyForPrefixSum(PimCmdEnum cmdType, const pimObjInfo& obj) const override; protected: diff --git a/libpimeval/src/pimPerfEnergyBase.cpp b/libpimeval/src/pimPerfEnergyBase.cpp index 2a1a5e49..feb3fa4a 100644 --- a/libpimeval/src/pimPerfEnergyBase.cpp +++ b/libpimeval/src/pimPerfEnergyBase.cpp @@ -171,7 +171,7 @@ pimPerfEnergyBase::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInf //! @brief Perf energy model of base class for rotate (placeholder) pimeval::perfEnergy -pimPerfEnergyBase::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const +pimPerfEnergyBase::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const { double msRuntime = 1e10; double mjEnergy = 999999999.9; diff --git a/libpimeval/src/pimPerfEnergyBase.h b/libpimeval/src/pimPerfEnergyBase.h index dc7f4b79..41095875 100644 --- a/libpimeval/src/pimPerfEnergyBase.h +++ b/libpimeval/src/pimPerfEnergyBase.h @@ -70,7 +70,7 @@ class pimPerfEnergyBase virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const; virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const; virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const; - virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const; + virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const; virtual pimeval::perfEnergy getPerfEnergyForPrefixSum(PimCmdEnum cmdType, const pimObjInfo& obj) const; virtual pimeval::perfEnergy getPerfEnergyForMac(PimCmdEnum cmdType, const pimObjInfo& obj) const; diff --git a/libpimeval/src/pimPerfEnergyBitSerial.cpp b/libpimeval/src/pimPerfEnergyBitSerial.cpp index 1724bbd8..9f0d7525 100644 --- a/libpimeval/src/pimPerfEnergyBitSerial.cpp +++ b/libpimeval/src/pimPerfEnergyBitSerial.cpp @@ -439,7 +439,7 @@ pimPerfEnergyBitSerial::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimO //! @brief Perf energy model of bit-serial PIM for rotate pimeval::perfEnergy -pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const +pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const { double msRuntime = 0.0; double mjEnergy = 0.0; @@ -451,8 +451,6 @@ pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI unsigned bitsPerElement = obj.getBitsPerElement(PimBitWidth::ACTUAL); unsigned numRegions = obj.getRegions().size(); unsigned numCore = obj.getNumCoreAvailable(); - // boundary handling - assume two times copying between device and host for boundary elements - pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8); switch (m_simTarget) { case PIM_DEVICE_BITSIMD_V: @@ -465,8 +463,14 @@ pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI totalOp += 3 * bitsPerElement * numPass * numCore; msRuntime = msRead + msWrite + msCompute; mjEnergy = (m_eAP + 3 * m_eL) * bitsPerElement * numPass; // for one pass - msRuntime += 2 * perfEnergyBT.m_msRuntime; - mjEnergy += 2 * perfEnergyBT.m_mjEnergy; + + // Only handle region boundaries if cross region communication is enabled + if(useCrossRegionCommunication) { + // boundary handling - assume two times copying between device and host for boundary elements + pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8); + msRuntime += 2 * perfEnergyBT.m_msRuntime; + mjEnergy += 2 * perfEnergyBT.m_mjEnergy; + } break; case PIM_DEVICE_SIMDRAM: // todo @@ -481,8 +485,12 @@ pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI msRuntime = (m_tR + (bitsPerElement + 2) * m_tL + m_tW); // for one pass msRuntime *= numPass; mjEnergy = (m_eAP + (bitsPerElement + 2) * m_eL) * numPass; - msRuntime += 2 * perfEnergyBT.m_msRuntime; - mjEnergy += 2 * perfEnergyBT.m_mjEnergy; + if(useCrossRegionCommunication) { + // boundary handling - assume two times copying between device and host for boundary elements + pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8); + msRuntime += 2 * perfEnergyBT.m_msRuntime; + mjEnergy += 2 * perfEnergyBT.m_mjEnergy; + } break; default: assert(0); diff --git a/libpimeval/src/pimPerfEnergyBitSerial.h b/libpimeval/src/pimPerfEnergyBitSerial.h index 86d8b681..f2ce4ee1 100644 --- a/libpimeval/src/pimPerfEnergyBitSerial.h +++ b/libpimeval/src/pimPerfEnergyBitSerial.h @@ -26,7 +26,7 @@ class pimPerfEnergyBitSerial : public pimPerfEnergyBase virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override; virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override; virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override; - virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override; + virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override; virtual pimeval::perfEnergy getPerfEnergyForPrefixSum(PimCmdEnum cmdType, const pimObjInfo& obj) const override; protected: diff --git a/libpimeval/src/pimPerfEnergyFulcrum.cpp b/libpimeval/src/pimPerfEnergyFulcrum.cpp index ced8b928..8bc1242c 100644 --- a/libpimeval/src/pimPerfEnergyFulcrum.cpp +++ b/libpimeval/src/pimPerfEnergyFulcrum.cpp @@ -307,7 +307,7 @@ pimPerfEnergyFulcrum::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObj //! @brief Perf energy model of Fulcrum for rotate pimeval::perfEnergy -pimPerfEnergyFulcrum::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const +pimPerfEnergyFulcrum::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const { double msRuntime = 0.0; double mjEnergy = 0.0; @@ -318,8 +318,6 @@ pimPerfEnergyFulcrum::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInf unsigned bitsPerElement = obj.getBitsPerElement(PimBitWidth::ACTUAL); unsigned numRegions = obj.getRegions().size(); uint64_t totalOp = 0; - // boundary handling - assume two times copying between device and host for boundary elements - pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8); // rotate within subarray: // For every bit: Read row to SA; move SA to R1; Shift R1 by N steps; Move R1 to SA; Write SA to row @@ -330,9 +328,15 @@ pimPerfEnergyFulcrum::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInf msWrite = m_tW * numPass; msRuntime = msRead + msWrite + msCompute; mjEnergy = (m_eAP + (bitsPerElement + 2) * m_eL) * numPass; - msRuntime += 2 * perfEnergyBT.m_msRuntime; - mjEnergy += 2 * perfEnergyBT.m_mjEnergy; - printf("PIM-Warning: Perf energy model is not precise for PIM command %s\n", pimCmd::getName(cmdType, "").c_str()); + + // Only handle region boundaries if cross region communication is enabled + if(useCrossRegionCommunication) { + // boundary handling - assume two times copying between device and host for boundary elements + pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8); + msRuntime += 2 * perfEnergyBT.m_msRuntime; + mjEnergy += 2 * perfEnergyBT.m_mjEnergy; + printf("PIM-Warning: Perf energy model is not precise for PIM command %s\n", pimCmd::getName(cmdType, "").c_str()); + } return pimeval::perfEnergy(msRuntime, mjEnergy, msRead, msWrite, msCompute, totalOp); } diff --git a/libpimeval/src/pimPerfEnergyFulcrum.h b/libpimeval/src/pimPerfEnergyFulcrum.h index 652107b9..f0e2d4a5 100644 --- a/libpimeval/src/pimPerfEnergyFulcrum.h +++ b/libpimeval/src/pimPerfEnergyFulcrum.h @@ -26,7 +26,7 @@ class pimPerfEnergyFulcrum : public pimPerfEnergyBase virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override; virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override; virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override; - virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override; + virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override; virtual pimeval::perfEnergy getPerfEnergyForPrefixSum(PimCmdEnum cmdType, const pimObjInfo& obj) const override; protected: diff --git a/libpimeval/src/pimSim.cpp b/libpimeval/src/pimSim.cpp index 8beb5485..2fec297c 100644 --- a/libpimeval/src/pimSim.cpp +++ b/libpimeval/src/pimSim.cpp @@ -892,7 +892,7 @@ pimSim::pimRotateElementsRight(PimObjId src) { pimPerfMon perfMon("pimRotateElementsRight"); if (!isValidDevice()) { return false; } - std::unique_ptr cmd = std::make_unique(PimCmdEnum::ROTATE_ELEM_R, src); + std::unique_ptr cmd = std::make_unique(PimCmdEnum::ROTATE_ELEM_R, src, true); return m_device->executeCmd(std::move(cmd)); } @@ -901,25 +901,25 @@ pimSim::pimRotateElementsLeft(PimObjId src) { pimPerfMon perfMon("pimRotateElementsLeft"); if (!isValidDevice()) { return false; } - std::unique_ptr cmd = std::make_unique(PimCmdEnum::ROTATE_ELEM_L, src); + std::unique_ptr cmd = std::make_unique(PimCmdEnum::ROTATE_ELEM_L, src, true); return m_device->executeCmd(std::move(cmd)); } bool -pimSim::pimShiftElementsRight(PimObjId src) +pimSim::pimShiftElementsRight(PimObjId src, bool useCrossRegionCommunication) { pimPerfMon perfMon("pimShiftElementsRight"); if (!isValidDevice()) { return false; } - std::unique_ptr cmd = std::make_unique(PimCmdEnum::SHIFT_ELEM_R, src); + std::unique_ptr cmd = std::make_unique(PimCmdEnum::SHIFT_ELEM_R, src, useCrossRegionCommunication); return m_device->executeCmd(std::move(cmd)); } bool -pimSim::pimShiftElementsLeft(PimObjId src) +pimSim::pimShiftElementsLeft(PimObjId src, bool useCrossRegionCommunication) { pimPerfMon perfMon("pimShiftElementsLeft"); if (!isValidDevice()) { return false; } - std::unique_ptr cmd = std::make_unique(PimCmdEnum::SHIFT_ELEM_L, src); + std::unique_ptr cmd = std::make_unique(PimCmdEnum::SHIFT_ELEM_L, src, useCrossRegionCommunication); return m_device->executeCmd(std::move(cmd)); } diff --git a/libpimeval/src/pimSim.h b/libpimeval/src/pimSim.h index abdf4dff..3f1d1c58 100644 --- a/libpimeval/src/pimSim.h +++ b/libpimeval/src/pimSim.h @@ -121,8 +121,8 @@ class pimSim template bool pimBroadcast(PimObjId dest, T value); bool pimRotateElementsRight(PimObjId src); bool pimRotateElementsLeft(PimObjId src); - bool pimShiftElementsRight(PimObjId src); - bool pimShiftElementsLeft(PimObjId src); + bool pimShiftElementsRight(PimObjId src, bool useCrossRegionCommunication); + bool pimShiftElementsLeft(PimObjId src, bool useCrossRegionCommunication); bool pimShiftBitsRight(PimObjId src, PimObjId dest, unsigned shiftAmount); bool pimShiftBitsLeft(PimObjId src, PimObjId dest, unsigned shiftAmount); bool pimAesSbox(PimObjId src, PimObjId dest, const std::vector& lut);