Skip to content

Commit f5280cd

Browse files
committed
Add JLJITLinkMemoryManager (ports memory manager to JITLink)
Ports our RTDyLD memory manager to JITLink in order to avoid memory use regressions after switching to JITLink everywhere (JuliaLang#60031). This is a direct port: finalization must happen all at once, because it invalidates all allocation `wr_ptr`s. I decided it wasn't worth it to associate `OnFinalizedFunction` callbacks with each block, since they are large enough to make it extremely likely that all in-flight allocations land in the same block; everything must be relocated before finalization can happen. I plan to add support for DualMapAllocator on ARM64 macOS, as well as an alternative for executable memory later. For now, we fall back to the old MapperJITLinkMemoryManager. Release JLJITLinkMemoryManager lock when calling FinalizedCallbacks
1 parent 0c34bde commit f5280cd

File tree

2 files changed

+185
-44
lines changed

2 files changed

+185
-44
lines changed

src/cgmemmgr.cpp

Lines changed: 184 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33
#include "llvm-version.h"
44
#include "platform.h"
55

6+
#include <llvm/ExecutionEngine/JITLink/JITLink.h>
7+
#include <llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h>
8+
#include <llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h>
69
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
10+
711
#include "julia.h"
812
#include "julia_internal.h"
913

@@ -460,26 +464,36 @@ struct Block {
460464
}
461465
};
462466

467+
struct Allocation {
468+
// Address to write to (the one returned by the allocation function)
469+
void *wr_addr;
470+
// Runtime address
471+
void *rt_addr;
472+
size_t sz;
473+
bool relocated;
474+
};
475+
463476
class RWAllocator {
464477
static constexpr int nblocks = 8;
465478
Block blocks[nblocks]{};
466479
public:
467480
RWAllocator() JL_NOTSAFEPOINT = default;
468-
void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT
481+
Allocation alloc(size_t size, size_t align) JL_NOTSAFEPOINT
469482
{
470483
size_t min_size = (size_t)-1;
471484
int min_id = 0;
472485
for (int i = 0;i < nblocks && blocks[i].ptr;i++) {
473486
if (void *ptr = blocks[i].alloc(size, align))
474-
return ptr;
487+
return {ptr, ptr, size, false};
475488
if (blocks[i].avail < min_size) {
476489
min_size = blocks[i].avail;
477490
min_id = i;
478491
}
479492
}
480493
size_t block_size = get_block_size(size);
481494
blocks[min_id].reset(map_anon_page(block_size), block_size);
482-
return blocks[min_id].alloc(size, align);
495+
void *ptr = blocks[min_id].alloc(size, align);
496+
return {ptr, ptr, size, false};
483497
}
484498
};
485499

@@ -519,16 +533,6 @@ struct SplitPtrBlock : public Block {
519533
}
520534
};
521535

522-
struct Allocation {
523-
// Address to write to (the one returned by the allocation function)
524-
void *wr_addr;
525-
// Runtime address
526-
void *rt_addr;
527-
size_t sz;
528-
bool relocated;
529-
};
530-
531-
template<bool exec>
532536
class ROAllocator {
533537
protected:
534538
static constexpr int nblocks = 8;
@@ -556,7 +560,7 @@ class ROAllocator {
556560
}
557561
// Allocations that have not been finalized yet.
558562
SmallVector<Allocation, 16> allocations;
559-
void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT
563+
Allocation alloc(size_t size, size_t align) JL_NOTSAFEPOINT
560564
{
561565
size_t min_size = (size_t)-1;
562566
int min_id = 0;
@@ -572,8 +576,9 @@ class ROAllocator {
572576
wr_ptr = get_wr_ptr(block, ptr, size, align);
573577
}
574578
block.state |= SplitPtrBlock::Alloc;
575-
allocations.push_back(Allocation{wr_ptr, ptr, size, false});
576-
return wr_ptr;
579+
Allocation a{wr_ptr, ptr, size, false};
580+
allocations.push_back(a);
581+
return a;
577582
}
578583
if (block.avail < min_size) {
579584
min_size = block.avail;
@@ -594,18 +599,21 @@ class ROAllocator {
594599
#ifdef _OS_WINDOWS_
595600
block.state = SplitPtrBlock::Alloc;
596601
void *wr_ptr = get_wr_ptr(block, ptr, size, align);
597-
allocations.push_back(Allocation{wr_ptr, ptr, size, false});
602+
Allocation a{wr_ptr, ptr, size, false};
603+
allocations.push_back(a);
598604
ptr = wr_ptr;
599605
#else
600606
block.state = SplitPtrBlock::Alloc | SplitPtrBlock::InitAlloc;
601-
allocations.push_back(Allocation{ptr, ptr, size, false});
607+
Allocation a{ptr, ptr, size, false};
608+
allocations.push_back(a);
602609
#endif
603-
return ptr;
610+
return a;
604611
}
605612
};
606613

607-
template<bool exec>
608-
class DualMapAllocator : public ROAllocator<exec> {
614+
class DualMapAllocator : public ROAllocator {
615+
bool exec;
616+
609617
protected:
610618
void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override JL_NOTSAFEPOINT
611619
{
@@ -666,7 +674,7 @@ class DualMapAllocator : public ROAllocator<exec> {
666674
}
667675
}
668676
public:
669-
DualMapAllocator() JL_NOTSAFEPOINT
677+
DualMapAllocator(bool exec) JL_NOTSAFEPOINT : exec(exec)
670678
{
671679
assert(anon_hdl != -1);
672680
}
@@ -679,13 +687,13 @@ class DualMapAllocator : public ROAllocator<exec> {
679687
finalize_block(block, true);
680688
block.reset(nullptr, 0);
681689
}
682-
ROAllocator<exec>::finalize();
690+
ROAllocator::finalize();
683691
}
684692
};
685693

686694
#ifdef _OS_LINUX_
687-
template<bool exec>
688-
class SelfMemAllocator : public ROAllocator<exec> {
695+
class SelfMemAllocator : public ROAllocator {
696+
bool exec;
689697
SmallVector<Block, 16> temp_buff;
690698
protected:
691699
void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr,
@@ -722,9 +730,7 @@ class SelfMemAllocator : public ROAllocator<exec> {
722730
}
723731
}
724732
public:
725-
SelfMemAllocator() JL_NOTSAFEPOINT
726-
: ROAllocator<exec>(),
727-
temp_buff()
733+
SelfMemAllocator(bool exec) JL_NOTSAFEPOINT : exec(exec), temp_buff()
728734
{
729735
assert(get_self_mem_fd() != -1);
730736
}
@@ -758,7 +764,7 @@ class SelfMemAllocator : public ROAllocator<exec> {
758764
}
759765
if (cached)
760766
temp_buff.resize(1);
761-
ROAllocator<exec>::finalize();
767+
ROAllocator::finalize();
762768
}
763769
};
764770
#endif // _OS_LINUX_
@@ -772,8 +778,8 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
772778
void operator=(const RTDyldMemoryManagerJL&) = delete;
773779
SmallVector<EHFrame, 16> pending_eh;
774780
RWAllocator rw_alloc;
775-
std::unique_ptr<ROAllocator<false>> ro_alloc;
776-
std::unique_ptr<ROAllocator<true>> exe_alloc;
781+
std::unique_ptr<ROAllocator> ro_alloc;
782+
std::unique_ptr<ROAllocator> exe_alloc;
777783
size_t total_allocated;
778784

779785
public:
@@ -787,13 +793,13 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
787793
{
788794
#ifdef _OS_LINUX_
789795
if (!ro_alloc && get_self_mem_fd() != -1) {
790-
ro_alloc.reset(new SelfMemAllocator<false>());
791-
exe_alloc.reset(new SelfMemAllocator<true>());
796+
ro_alloc.reset(new SelfMemAllocator(false));
797+
exe_alloc.reset(new SelfMemAllocator(true));
792798
}
793799
#endif
794800
if (!ro_alloc && init_shared_map() != -1) {
795-
ro_alloc.reset(new DualMapAllocator<false>());
796-
exe_alloc.reset(new DualMapAllocator<true>());
801+
ro_alloc.reset(new DualMapAllocator(false));
802+
exe_alloc.reset(new DualMapAllocator(true));
797803
}
798804
}
799805
~RTDyldMemoryManagerJL() override JL_NOTSAFEPOINT
@@ -847,7 +853,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size,
847853
jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
848854
jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size);
849855
if (exe_alloc)
850-
return (uint8_t*)exe_alloc->alloc(Size, Alignment);
856+
return (uint8_t*)exe_alloc->alloc(Size, Alignment).wr_addr;
851857
return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID,
852858
SectionName);
853859
}
@@ -862,9 +868,9 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size,
862868
jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
863869
jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, Size);
864870
if (!isReadOnly)
865-
return (uint8_t*)rw_alloc.alloc(Size, Alignment);
871+
return (uint8_t*)rw_alloc.alloc(Size, Alignment).wr_addr;
866872
if (ro_alloc)
867-
return (uint8_t*)ro_alloc->alloc(Size, Alignment);
873+
return (uint8_t*)ro_alloc->alloc(Size, Alignment).wr_addr;
868874
return SectionMemoryManager::allocateDataSection(Size, Alignment, SectionID,
869875
SectionName, isReadOnly);
870876
}
@@ -919,6 +925,141 @@ void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr,
919925
}
920926
#endif
921927

928+
class JLJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
929+
using OnFinalizedFunction =
930+
jitlink::JITLinkMemoryManager::InFlightAlloc::OnFinalizedFunction;
931+
932+
std::mutex Mutex;
933+
RWAllocator RWAlloc;
934+
std::unique_ptr<ROAllocator> ROAlloc;
935+
std::unique_ptr<ROAllocator> ExeAlloc;
936+
SmallVector<OnFinalizedFunction> FinalizedCallbacks;
937+
uint32_t InFlight{0};
938+
939+
public:
940+
class InFlightAlloc;
941+
942+
static std::unique_ptr<JITLinkMemoryManager> Create()
943+
{
944+
#ifdef _OS_LINUX_
945+
bool ok = get_self_mem_fd() != -1;
946+
#else
947+
bool ok = init_shared_map() != -1;
948+
#endif
949+
if (ok)
950+
return std::unique_ptr<JLJITLinkMemoryManager>(new JLJITLinkMemoryManager());
951+
952+
return cantFail(
953+
orc::MapperJITLinkMemoryManager::CreateWithMapper<orc::InProcessMemoryMapper>(
954+
/*Reservation Granularity*/ 16 * 1024 * 1024));
955+
}
956+
957+
void allocate(const jitlink::JITLinkDylib *JD, jitlink::LinkGraph &G,
958+
OnAllocatedFunction OnAllocated) override;
959+
960+
void deallocate(std::vector<FinalizedAlloc> Allocs,
961+
OnDeallocatedFunction OnDeallocated) override
962+
{
963+
jl_unreachable();
964+
}
965+
966+
protected:
967+
JLJITLinkMemoryManager()
968+
#ifdef _OS_LINUX_
969+
: ROAlloc(std::make_unique<SelfMemAllocator>(false)),
970+
ExeAlloc(std::make_unique<SelfMemAllocator>(true))
971+
#else
972+
: ROAlloc(std::make_unique<DualMapAllocator>(false)),
973+
ExeAlloc(std::make_unique<DualMapAllocator>(true))
974+
#endif
975+
{
976+
}
977+
978+
void finalize(OnFinalizedFunction OnFinalized)
979+
{
980+
SmallVector<OnFinalizedFunction> Callbacks;
981+
{
982+
std::unique_lock Lock{Mutex};
983+
FinalizedCallbacks.push_back(std::move(OnFinalized));
984+
985+
if (--InFlight > 0)
986+
return;
987+
988+
ROAlloc->finalize();
989+
ExeAlloc->finalize();
990+
Callbacks = std::move(FinalizedCallbacks);
991+
}
992+
993+
for (auto &CB : Callbacks)
994+
std::move(CB)(FinalizedAlloc{});
995+
}
996+
};
997+
998+
class JLJITLinkMemoryManager::InFlightAlloc
999+
: public jitlink::JITLinkMemoryManager::InFlightAlloc {
1000+
JLJITLinkMemoryManager &MM;
1001+
jitlink::LinkGraph &G;
1002+
1003+
public:
1004+
InFlightAlloc(JLJITLinkMemoryManager &MM, jitlink::LinkGraph &G) : MM(MM), G(G) {}
1005+
1006+
void abandon(OnAbandonedFunction OnAbandoned) override { jl_unreachable(); }
1007+
1008+
void finalize(OnFinalizedFunction OnFinalized) override
1009+
{
1010+
auto *GP = &G;
1011+
MM.finalize([GP, OnFinalized =
1012+
std::move(OnFinalized)](Expected<FinalizedAlloc> FA) mutable {
1013+
if (!FA)
1014+
return OnFinalized(FA.takeError());
1015+
// Need to handle dealloc actions when we GC code
1016+
auto E = orc::shared::runFinalizeActions(GP->allocActions());
1017+
if (!E)
1018+
return OnFinalized(E.takeError());
1019+
OnFinalized(std::move(FA));
1020+
});
1021+
}
1022+
};
1023+
1024+
using orc::MemProt;
1025+
1026+
void JLJITLinkMemoryManager::allocate(const jitlink::JITLinkDylib *JD,
1027+
jitlink::LinkGraph &G,
1028+
OnAllocatedFunction OnAllocated)
1029+
{
1030+
jitlink::BasicLayout BL{G};
1031+
1032+
{
1033+
std::unique_lock Lock{Mutex};
1034+
for (auto &[AG, Seg] : BL.segments()) {
1035+
if (AG.getMemLifetime() == orc::MemLifetime::NoAlloc)
1036+
continue;
1037+
assert(AG.getMemLifetime() == orc::MemLifetime::Standard);
1038+
1039+
auto Prot = AG.getMemProt();
1040+
uint64_t Alignment = Seg.Alignment.value();
1041+
uint64_t Size = Seg.ContentSize + Seg.ZeroFillSize;
1042+
Allocation Alloc;
1043+
if (Prot == (MemProt::Read | MemProt::Write))
1044+
Alloc = RWAlloc.alloc(Size, Alignment);
1045+
else if (Prot == MemProt::Read)
1046+
Alloc = ROAlloc->alloc(Size, Alignment);
1047+
else if (Prot == (MemProt::Read | MemProt::Exec))
1048+
Alloc = ExeAlloc->alloc(Size, Alignment);
1049+
else
1050+
abort();
1051+
1052+
Seg.Addr = orc::ExecutorAddr::fromPtr(Alloc.rt_addr);
1053+
Seg.WorkingMem = (char *)Alloc.wr_addr;
1054+
}
1055+
}
1056+
1057+
if (auto Err = BL.apply())
1058+
return OnAllocated(std::move(Err));
1059+
1060+
++InFlight;
1061+
OnAllocated(std::make_unique<InFlightAlloc>(*this, G));
1062+
}
9221063
}
9231064

9241065
RTDyldMemoryManager* createRTDyldMemoryManager() JL_NOTSAFEPOINT
@@ -930,3 +1071,8 @@ size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT
9301071
{
9311072
return ((RTDyldMemoryManagerJL*)mm)->getTotalBytes();
9321073
}
1074+
1075+
std::unique_ptr<jitlink::JITLinkMemoryManager> createJITLinkMemoryManager()
1076+
{
1077+
return JLJITLinkMemoryManager::Create();
1078+
}

src/jitlayers.cpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1208,12 +1208,6 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
12081208
#pragma clang diagnostic ignored "-Wunused-function"
12091209
#endif
12101210

1211-
// TODO: Port our memory management optimisations to JITLink instead of using the
1212-
// default InProcessMemoryManager.
1213-
std::unique_ptr<jitlink::JITLinkMemoryManager> createJITLinkMemoryManager() JL_NOTSAFEPOINT {
1214-
return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper<orc::InProcessMemoryMapper>(/*Reservation Granularity*/ 16 * 1024 * 1024));
1215-
}
1216-
12171211
#ifdef _COMPILER_CLANG_
12181212
#pragma clang diagnostic pop
12191213
#endif
@@ -1237,6 +1231,7 @@ class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar {
12371231
};
12381232

12391233
RTDyldMemoryManager *createRTDyldMemoryManager(void) JL_NOTSAFEPOINT;
1234+
std::unique_ptr<jitlink::JITLinkMemoryManager> createJITLinkMemoryManager() JL_NOTSAFEPOINT;
12401235

12411236
// A simple forwarding class, since OrcJIT v2 needs a unique_ptr, while we have a shared_ptr
12421237
class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {

0 commit comments

Comments
 (0)