diff --git a/packages/hub/.gitignore b/packages/hub/.gitignore index 17ac506cbd..02fe4ede6f 100644 --- a/packages/hub/.gitignore +++ b/packages/hub/.gitignore @@ -1,3 +1,3 @@ xet-core-wasm-build -shard.bin -xorb.bin \ No newline at end of file +*.bin +.debug \ No newline at end of file diff --git a/packages/hub/src/utils/ChunkCache.ts b/packages/hub/src/utils/ChunkCache.ts index 50d356cc80..01f71ca890 100644 --- a/packages/hub/src/utils/ChunkCache.ts +++ b/packages/hub/src/utils/ChunkCache.ts @@ -12,6 +12,16 @@ export class ChunkCache { hmacs = new Set(); // todo : remove old hmacs addChunkToCache(hash: string, xorbIndex: number, chunkIndex: number, hmac: string | null): void { + if (this.map.has(hash)) { + // Happens when we receive an existing chunk from remote dedup info (eg duplicate chunk in shard? Or shards with same hmac key + // sharing chunks/xorbs) + + // processing this chunk again would desync the cache, as `this.map.size` would not increase, as opposed to `this.index` + + // Ideally we'd still process it to evict it later ("refresh it") but would need more complex handling, or stop using + // the Uint16Array / Int32Array which are optimized for memory usage + return; + } this.map.set(hash, this.index); if (hmac !== null) { this.hmacs.add(hmac);