diff --git a/toolchain/base/value_store.h b/toolchain/base/value_store.h index d1c70187228c0..1318ca1a9dad1 100644 --- a/toolchain/base/value_store.h +++ b/toolchain/base/value_store.h @@ -5,6 +5,7 @@ #ifndef CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_ #define CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_ +#include #include #include #include @@ -37,11 +38,17 @@ template class ValueStoreRange; // Common calculation for ValueStore types. -template +template class ValueStoreTypes { public: using ValueType = std::decay_t; + // TODO: Would be a bit cleaner to not have this here as it's only meaningful + // to the `CanonicalValueStore`, not to other `ValueStore`s. Planned to fix + // with a larger refactoring. + using KeyType = std::decay_t; + // Typically we want to use `ValueType&` and `const ValueType& to avoid // copies, but when the value type is a `StringRef`, we assume external // storage for the string data and both our value type and ref type will be @@ -53,6 +60,14 @@ class ValueStoreTypes { llvm::StringRef, const ValueType&>; }; +// If `IdT` provides a distinct `IdT::KeyType`, default to that for the key +// type. +template + requires(!std::same_as) +class ValueStoreTypes + : public ValueStoreTypes {}; + // A simple wrapper for accumulating values, providing IDs to later retrieve the // value. This does not do deduplication. // @@ -221,11 +236,16 @@ class ValueStoreRange { // A wrapper for accumulating immutable values with deduplication, providing IDs // to later retrieve the value. // -// IdT::ValueType must represent the type being indexed. +// `IdT::ValueType` must represent the type being indexed. +// +// `IdT::KeyType` can optionally be present, and if so is used for the argument +// to `Lookup`. It must be valid to use both `KeyType` and `ValueType` as lookup +// types in the underlying `Set`. template class CanonicalValueStore { public: using ValueType = ValueStoreTypes::ValueType; + using KeyType = ValueStoreTypes::KeyType; using RefType = ValueStoreTypes::RefType; using ConstRefType = ValueStoreTypes::ConstRefType; @@ -237,7 +257,7 @@ class CanonicalValueStore { // Looks up the canonical ID for a value, or returns `None` if not in the // store. - auto Lookup(ValueType value) const -> IdT; + auto Lookup(KeyType key) const -> IdT; // Reserves space. auto Reserve(size_t size) -> void; @@ -290,8 +310,8 @@ auto CanonicalValueStore::Add(ValueType value) -> IdT { } template -auto CanonicalValueStore::Lookup(ValueType value) const -> IdT { - if (auto result = set_.Lookup(value, KeyContext(&values_))) { +auto CanonicalValueStore::Lookup(KeyType key) const -> IdT { + if (auto result = set_.Lookup(key, KeyContext(&values_))) { return result.key(); } return IdT::None; diff --git a/toolchain/check/import_cpp.cpp b/toolchain/check/import_cpp.cpp index 7e331724f8633..23a62fd339874 100644 --- a/toolchain/check/import_cpp.cpp +++ b/toolchain/check/import_cpp.cpp @@ -350,9 +350,8 @@ static auto AsCarbonNamespace(Context& context, auto& clang_decls = context.sem_ir().clang_decls(); // Check if the decl context is already mapped to a Carbon namespace. - if (auto context_clang_decl_id = clang_decls.Lookup( - {.decl = clang::dyn_cast(decl_context), - .inst_id = SemIR::InstId::None}); + if (auto context_clang_decl_id = + clang_decls.Lookup(clang::dyn_cast(decl_context)); context_clang_decl_id.has_value()) { return clang_decls.Get(context_clang_decl_id).inst_id; } @@ -365,8 +364,7 @@ static auto AsCarbonNamespace(Context& context, decl_contexts.push_back(decl_context); decl_context = decl_context->getParent(); parent_decl_id = - clang_decls.Lookup({.decl = clang::dyn_cast(decl_context), - .inst_id = SemIR::InstId::None}); + clang_decls.Lookup(clang::dyn_cast(decl_context)); } while (!parent_decl_id.has_value()); // We know the parent of the last decl context is mapped, map the rest. @@ -532,8 +530,7 @@ static auto MapRecordType(Context& context, SemIR::LocId loc_id, if (record_decl && !record_decl->isUnion()) { auto& clang_decls = context.sem_ir().clang_decls(); SemIR::InstId record_inst_id = SemIR::InstId::None; - if (auto record_clang_decl_id = clang_decls.Lookup( - {.decl = record_decl, .inst_id = SemIR::InstId::None}); + if (auto record_clang_decl_id = clang_decls.Lookup(record_decl); record_clang_decl_id.has_value()) { record_inst_id = clang_decls.Get(record_clang_decl_id).inst_id; } else { diff --git a/toolchain/sem_ir/clang_decl.h b/toolchain/sem_ir/clang_decl.h index d918f92b49786..c17570d60c270 100644 --- a/toolchain/sem_ir/clang_decl.h +++ b/toolchain/sem_ir/clang_decl.h @@ -20,17 +20,28 @@ class Decl; namespace Carbon::SemIR { // A Clang declaration mapped to a Carbon instruction. -// Using custom hashing since the declaration is keyed by the `decl` member for -// lookup. -// TODO: Avoid custom hashing by either having the data structure support keying -// or create a dedicated mapping. See -// https://discord.com/channels/655572317891461132/768530752592805919/1384999468293947537 +// +// Note that Clang's AST uses address-identity for nodes, which means the +// pointer is the canonical way to represent a specific AST node and is expected +// to be sufficient for comparison, hashing, etc. +// +// This type is specifically designed for use in a `CanonicalValueStore` and +// provide a single canonical access from SemIR to each `clang::Decl*` used. +// This also ensures that a given `clang::Decl*` is associated with exactly one +// instruction, and the `inst_id` here provides access to that instruction from +// either the `ClangDeclId` or the `clang::Decl*`. struct ClangDecl : public Printable { auto Print(llvm::raw_ostream& out) const -> void; - friend auto CarbonHashtableEq(const ClangDecl& lhs, const ClangDecl& rhs) - -> bool { - return HashtableEq(lhs.decl, rhs.decl); + // Equality comparison uses the address-identity property of the Clang AST and + // just compares the `decl` pointers. The `inst_id` is always the same due to + // the canonicalization. + auto operator==(const ClangDecl& rhs) const -> bool { + return decl == rhs.decl; + } + // Support direct comparison with the Clang AST node pointer. + auto operator==(const clang::Decl* rhs_decl) const -> bool { + return decl == rhs_decl; } // Hashing for ClangDecl. See common/hashing.h. @@ -45,15 +56,28 @@ struct ClangDecl : public Printable { clang::Decl* decl = nullptr; // The instruction the Clang declaration is mapped to. + // + // This is stored along side the `decl` pointer to avoid having to lookup both + // the pointer and the instruction ID in two separate areas of storage. InstId inst_id; }; // The ID of a `ClangDecl`. +// +// These IDs are importantly distinct from the `inst_id` associated with each +// declaration. These form a dense range of IDs that is used to reference the +// AST node pointers without storing those pointers directly into SemIR and +// needing space to hold a full pointer. We can't avoid having these IDs without +// embedding pointers directly into the storage of SemIR as part of an +// instruction. struct ClangDeclId : public IdBase { static constexpr llvm::StringLiteral Label = "clang_decl_id"; using ValueType = ClangDecl; + // Use the AST node pointer directly when doing `Lookup` to find an ID. + using KeyType = clang::Decl*; + using IdBase::IdBase; };