Skip to content

Commit 88ee64f

Browse files
authored
Fix aarch64 macOS crash when SIP disabled (re-land JLJITLinkMemoryManager/#60105) (#60230)
Apple ARM CPUs treat the `ic ivau` as a memory read, which causes a confusing crash in DualMapAllocator if we try using it on a `wr_addr` that has been mprotected to `Prot::NO`, since we are still holding the allocator lock. For Apple aarch64 systems with SIP disabled, this will result in some memory savings, since DualMapAllocator will now work there. Like before, other JITLink platforms, namely Linux aarch64 and RISC-V, will benefit too. This re-lands #60105, after it was reverted in #60196. Thanks @giordano!
1 parent 802f1df commit 88ee64f

File tree

2 files changed

+194
-59
lines changed

2 files changed

+194
-59
lines changed

src/cgmemmgr.cpp

Lines changed: 193 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33
#include "llvm-version.h"
44
#include "platform.h"
55

6+
#include <llvm/ExecutionEngine/JITLink/JITLink.h>
7+
#include <llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h>
8+
#include <llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h>
69
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
10+
711
#include "julia.h"
812
#include "julia_internal.h"
913

@@ -460,26 +464,36 @@ struct Block {
460464
}
461465
};
462466

467+
struct Allocation {
468+
// Address to write to (the one returned by the allocation function)
469+
void *wr_addr;
470+
// Runtime address
471+
void *rt_addr;
472+
size_t sz;
473+
bool relocated;
474+
};
475+
463476
class RWAllocator {
464477
static constexpr int nblocks = 8;
465478
Block blocks[nblocks]{};
466479
public:
467480
RWAllocator() JL_NOTSAFEPOINT = default;
468-
void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT
481+
Allocation alloc(size_t size, size_t align) JL_NOTSAFEPOINT
469482
{
470483
size_t min_size = (size_t)-1;
471484
int min_id = 0;
472485
for (int i = 0;i < nblocks && blocks[i].ptr;i++) {
473486
if (void *ptr = blocks[i].alloc(size, align))
474-
return ptr;
487+
return {ptr, ptr, size, false};
475488
if (blocks[i].avail < min_size) {
476489
min_size = blocks[i].avail;
477490
min_id = i;
478491
}
479492
}
480493
size_t block_size = get_block_size(size);
481494
blocks[min_id].reset(map_anon_page(block_size), block_size);
482-
return blocks[min_id].alloc(size, align);
495+
void *ptr = blocks[min_id].alloc(size, align);
496+
return {ptr, ptr, size, false};
483497
}
484498
};
485499

@@ -519,16 +533,6 @@ struct SplitPtrBlock : public Block {
519533
}
520534
};
521535

522-
struct Allocation {
523-
// Address to write to (the one returned by the allocation function)
524-
void *wr_addr;
525-
// Runtime address
526-
void *rt_addr;
527-
size_t sz;
528-
bool relocated;
529-
};
530-
531-
template<bool exec>
532536
class ROAllocator {
533537
protected:
534538
static constexpr int nblocks = 8;
@@ -544,19 +548,18 @@ class ROAllocator {
544548
virtual ~ROAllocator() JL_NOTSAFEPOINT {}
545549
virtual void finalize() JL_NOTSAFEPOINT
546550
{
547-
for (auto &alloc: allocations) {
548-
// ensure the mapped pages are consistent
549-
sys::Memory::InvalidateInstructionCache(alloc.wr_addr,
550-
alloc.sz);
551-
sys::Memory::InvalidateInstructionCache(alloc.rt_addr,
552-
alloc.sz);
553-
}
551+
// Note: on some aarch64 platforms, like Apple CPUs, we need read
552+
// permission in order to invalidate instruction cache lines. We are
553+
// not guaranteed to have read permission on the wr_addr when using
554+
// DualMapAllocator.
555+
for (auto &alloc : allocations)
556+
sys::Memory::InvalidateInstructionCache(alloc.rt_addr, alloc.sz);
554557
completed.clear();
555558
allocations.clear();
556559
}
557560
// Allocations that have not been finalized yet.
558561
SmallVector<Allocation, 16> allocations;
559-
void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT
562+
Allocation alloc(size_t size, size_t align) JL_NOTSAFEPOINT
560563
{
561564
size_t min_size = (size_t)-1;
562565
int min_id = 0;
@@ -572,8 +575,9 @@ class ROAllocator {
572575
wr_ptr = get_wr_ptr(block, ptr, size, align);
573576
}
574577
block.state |= SplitPtrBlock::Alloc;
575-
allocations.push_back(Allocation{wr_ptr, ptr, size, false});
576-
return wr_ptr;
578+
Allocation a{wr_ptr, ptr, size, false};
579+
allocations.push_back(a);
580+
return a;
577581
}
578582
if (block.avail < min_size) {
579583
min_size = block.avail;
@@ -594,18 +598,21 @@ class ROAllocator {
594598
#ifdef _OS_WINDOWS_
595599
block.state = SplitPtrBlock::Alloc;
596600
void *wr_ptr = get_wr_ptr(block, ptr, size, align);
597-
allocations.push_back(Allocation{wr_ptr, ptr, size, false});
601+
Allocation a{wr_ptr, ptr, size, false};
602+
allocations.push_back(a);
598603
ptr = wr_ptr;
599604
#else
600605
block.state = SplitPtrBlock::Alloc | SplitPtrBlock::InitAlloc;
601-
allocations.push_back(Allocation{ptr, ptr, size, false});
606+
Allocation a{ptr, ptr, size, false};
607+
allocations.push_back(a);
602608
#endif
603-
return ptr;
609+
return a;
604610
}
605611
};
606612

607-
template<bool exec>
608-
class DualMapAllocator : public ROAllocator<exec> {
613+
class DualMapAllocator : public ROAllocator {
614+
bool exec;
615+
609616
protected:
610617
void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override JL_NOTSAFEPOINT
611618
{
@@ -666,7 +673,7 @@ class DualMapAllocator : public ROAllocator<exec> {
666673
}
667674
}
668675
public:
669-
DualMapAllocator() JL_NOTSAFEPOINT
676+
DualMapAllocator(bool exec) JL_NOTSAFEPOINT : exec(exec)
670677
{
671678
assert(anon_hdl != -1);
672679
}
@@ -679,13 +686,13 @@ class DualMapAllocator : public ROAllocator<exec> {
679686
finalize_block(block, true);
680687
block.reset(nullptr, 0);
681688
}
682-
ROAllocator<exec>::finalize();
689+
ROAllocator::finalize();
683690
}
684691
};
685692

686693
#ifdef _OS_LINUX_
687-
template<bool exec>
688-
class SelfMemAllocator : public ROAllocator<exec> {
694+
class SelfMemAllocator : public ROAllocator {
695+
bool exec;
689696
SmallVector<Block, 16> temp_buff;
690697
protected:
691698
void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr,
@@ -722,9 +729,7 @@ class SelfMemAllocator : public ROAllocator<exec> {
722729
}
723730
}
724731
public:
725-
SelfMemAllocator() JL_NOTSAFEPOINT
726-
: ROAllocator<exec>(),
727-
temp_buff()
732+
SelfMemAllocator(bool exec) JL_NOTSAFEPOINT : exec(exec), temp_buff()
728733
{
729734
assert(get_self_mem_fd() != -1);
730735
}
@@ -758,11 +763,25 @@ class SelfMemAllocator : public ROAllocator<exec> {
758763
}
759764
if (cached)
760765
temp_buff.resize(1);
761-
ROAllocator<exec>::finalize();
766+
ROAllocator::finalize();
762767
}
763768
};
764769
#endif // _OS_LINUX_
765770

771+
std::pair<std::unique_ptr<ROAllocator>, std::unique_ptr<ROAllocator>>
772+
get_preferred_allocators() JL_NOTSAFEPOINT
773+
{
774+
#ifdef _OS_LINUX_
775+
if (get_self_mem_fd() != -1)
776+
return {std::make_unique<SelfMemAllocator>(false),
777+
std::make_unique<SelfMemAllocator>(true)};
778+
#endif
779+
if (init_shared_map() != -1)
780+
return {std::make_unique<DualMapAllocator>(false),
781+
std::make_unique<DualMapAllocator>(true)};
782+
return {};
783+
}
784+
766785
class RTDyldMemoryManagerJL : public SectionMemoryManager {
767786
struct EHFrame {
768787
uint8_t *addr;
@@ -772,29 +791,18 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
772791
void operator=(const RTDyldMemoryManagerJL&) = delete;
773792
SmallVector<EHFrame, 16> pending_eh;
774793
RWAllocator rw_alloc;
775-
std::unique_ptr<ROAllocator<false>> ro_alloc;
776-
std::unique_ptr<ROAllocator<true>> exe_alloc;
794+
std::unique_ptr<ROAllocator> ro_alloc;
795+
std::unique_ptr<ROAllocator> exe_alloc;
777796
size_t total_allocated;
778797

779798
public:
780799
RTDyldMemoryManagerJL() JL_NOTSAFEPOINT
781800
: SectionMemoryManager(),
782801
pending_eh(),
783802
rw_alloc(),
784-
ro_alloc(),
785-
exe_alloc(),
786803
total_allocated(0)
787804
{
788-
#ifdef _OS_LINUX_
789-
if (!ro_alloc && get_self_mem_fd() != -1) {
790-
ro_alloc.reset(new SelfMemAllocator<false>());
791-
exe_alloc.reset(new SelfMemAllocator<true>());
792-
}
793-
#endif
794-
if (!ro_alloc && init_shared_map() != -1) {
795-
ro_alloc.reset(new DualMapAllocator<false>());
796-
exe_alloc.reset(new DualMapAllocator<true>());
797-
}
805+
std::tie(ro_alloc, exe_alloc) = get_preferred_allocators();
798806
}
799807
~RTDyldMemoryManagerJL() override JL_NOTSAFEPOINT
800808
{
@@ -847,7 +855,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size,
847855
jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
848856
jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size);
849857
if (exe_alloc)
850-
return (uint8_t*)exe_alloc->alloc(Size, Alignment);
858+
return (uint8_t*)exe_alloc->alloc(Size, Alignment).wr_addr;
851859
return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID,
852860
SectionName);
853861
}
@@ -862,9 +870,9 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size,
862870
jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
863871
jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, Size);
864872
if (!isReadOnly)
865-
return (uint8_t*)rw_alloc.alloc(Size, Alignment);
873+
return (uint8_t*)rw_alloc.alloc(Size, Alignment).wr_addr;
866874
if (ro_alloc)
867-
return (uint8_t*)ro_alloc->alloc(Size, Alignment);
875+
return (uint8_t*)ro_alloc->alloc(Size, Alignment).wr_addr;
868876
return SectionMemoryManager::allocateDataSection(Size, Alignment, SectionID,
869877
SectionName, isReadOnly);
870878
}
@@ -919,6 +927,133 @@ void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr,
919927
}
920928
#endif
921929

930+
class JLJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
931+
using OnFinalizedFunction =
932+
jitlink::JITLinkMemoryManager::InFlightAlloc::OnFinalizedFunction;
933+
934+
std::mutex Mutex;
935+
RWAllocator RWAlloc;
936+
std::unique_ptr<ROAllocator> ROAlloc;
937+
std::unique_ptr<ROAllocator> ExeAlloc;
938+
SmallVector<OnFinalizedFunction> FinalizedCallbacks;
939+
uint32_t InFlight{0};
940+
941+
public:
942+
class InFlightAlloc;
943+
944+
static std::unique_ptr<JITLinkMemoryManager> Create()
945+
{
946+
auto [ROAlloc, ExeAlloc] = get_preferred_allocators();
947+
if (ROAlloc && ExeAlloc)
948+
return std::unique_ptr<JLJITLinkMemoryManager>(
949+
new JLJITLinkMemoryManager(std::move(ROAlloc), std::move(ExeAlloc)));
950+
951+
return cantFail(
952+
orc::MapperJITLinkMemoryManager::CreateWithMapper<orc::InProcessMemoryMapper>(
953+
/*Reservation Granularity*/ 16 * 1024 * 1024));
954+
}
955+
956+
void allocate(const jitlink::JITLinkDylib *JD, jitlink::LinkGraph &G,
957+
OnAllocatedFunction OnAllocated) override;
958+
959+
void deallocate(std::vector<FinalizedAlloc> Allocs,
960+
OnDeallocatedFunction OnDeallocated) override
961+
{
962+
jl_unreachable();
963+
}
964+
965+
protected:
966+
JLJITLinkMemoryManager(std::unique_ptr<ROAllocator> ROAlloc,
967+
std::unique_ptr<ROAllocator> ExeAlloc)
968+
: ROAlloc(std::move(ROAlloc)), ExeAlloc(std::move(ExeAlloc))
969+
{
970+
}
971+
972+
void finalize(OnFinalizedFunction OnFinalized)
973+
{
974+
SmallVector<OnFinalizedFunction> Callbacks;
975+
{
976+
std::unique_lock Lock{Mutex};
977+
FinalizedCallbacks.push_back(std::move(OnFinalized));
978+
979+
if (--InFlight > 0)
980+
return;
981+
982+
ROAlloc->finalize();
983+
ExeAlloc->finalize();
984+
Callbacks = std::move(FinalizedCallbacks);
985+
}
986+
987+
for (auto &CB : Callbacks)
988+
std::move(CB)(FinalizedAlloc{});
989+
}
990+
};
991+
992+
class JLJITLinkMemoryManager::InFlightAlloc
993+
: public jitlink::JITLinkMemoryManager::InFlightAlloc {
994+
JLJITLinkMemoryManager &MM;
995+
jitlink::LinkGraph &G;
996+
997+
public:
998+
InFlightAlloc(JLJITLinkMemoryManager &MM, jitlink::LinkGraph &G) : MM(MM), G(G) {}
999+
1000+
void abandon(OnAbandonedFunction OnAbandoned) override { jl_unreachable(); }
1001+
1002+
void finalize(OnFinalizedFunction OnFinalized) override
1003+
{
1004+
auto *GP = &G;
1005+
MM.finalize([GP, OnFinalized =
1006+
std::move(OnFinalized)](Expected<FinalizedAlloc> FA) mutable {
1007+
if (!FA)
1008+
return OnFinalized(FA.takeError());
1009+
// Need to handle dealloc actions when we GC code
1010+
auto E = orc::shared::runFinalizeActions(GP->allocActions());
1011+
if (!E)
1012+
return OnFinalized(E.takeError());
1013+
OnFinalized(std::move(FA));
1014+
});
1015+
}
1016+
};
1017+
1018+
using orc::MemProt;
1019+
1020+
void JLJITLinkMemoryManager::allocate(const jitlink::JITLinkDylib *JD,
1021+
jitlink::LinkGraph &G,
1022+
OnAllocatedFunction OnAllocated)
1023+
{
1024+
jitlink::BasicLayout BL{G};
1025+
1026+
{
1027+
std::unique_lock Lock{Mutex};
1028+
for (auto &[AG, Seg] : BL.segments()) {
1029+
if (AG.getMemLifetime() == orc::MemLifetime::NoAlloc)
1030+
continue;
1031+
assert(AG.getMemLifetime() == orc::MemLifetime::Standard);
1032+
1033+
auto Prot = AG.getMemProt();
1034+
uint64_t Alignment = Seg.Alignment.value();
1035+
uint64_t Size = Seg.ContentSize + Seg.ZeroFillSize;
1036+
Allocation Alloc;
1037+
if (Prot == (MemProt::Read | MemProt::Write))
1038+
Alloc = RWAlloc.alloc(Size, Alignment);
1039+
else if (Prot == MemProt::Read)
1040+
Alloc = ROAlloc->alloc(Size, Alignment);
1041+
else if (Prot == (MemProt::Read | MemProt::Exec))
1042+
Alloc = ExeAlloc->alloc(Size, Alignment);
1043+
else
1044+
abort();
1045+
1046+
Seg.Addr = orc::ExecutorAddr::fromPtr(Alloc.rt_addr);
1047+
Seg.WorkingMem = (char *)Alloc.wr_addr;
1048+
}
1049+
}
1050+
1051+
if (auto Err = BL.apply())
1052+
return OnAllocated(std::move(Err));
1053+
1054+
++InFlight;
1055+
OnAllocated(std::make_unique<InFlightAlloc>(*this, G));
1056+
}
9221057
}
9231058

9241059
RTDyldMemoryManager* createRTDyldMemoryManager() JL_NOTSAFEPOINT
@@ -930,3 +1065,8 @@ size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT
9301065
{
9311066
return ((RTDyldMemoryManagerJL*)mm)->getTotalBytes();
9321067
}
1068+
1069+
std::unique_ptr<jitlink::JITLinkMemoryManager> createJITLinkMemoryManager()
1070+
{
1071+
return JLJITLinkMemoryManager::Create();
1072+
}

0 commit comments

Comments
 (0)