diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts index 4366fe4d9a..db80b577a3 100644 --- a/packages/gguf/src/gguf.spec.ts +++ b/packages/gguf/src/gguf.spec.ts @@ -11,6 +11,7 @@ import { GGUF_QUANT_ORDER, findNearestQuantType, serializeGgufMetadata, + buildGgufHeader, } from "./gguf"; import fs from "node:fs"; import { tmpdir } from "node:os"; @@ -832,7 +833,6 @@ describe("gguf", () => { typedMetadata: originalMetadata, tensorDataOffset, littleEndian, - tensorInfos, } = await gguf(testUrl, { typedMetadata: true, }); @@ -895,4 +895,288 @@ describe("gguf", () => { } }, 30000); }); + + describe("buildGgufHeader", () => { + it("should rebuild GGUF header with updated metadata", async () => { + // Parse a smaller GGUF file to get original metadata and structure + const { + typedMetadata: originalMetadata, + tensorInfoByteRange, + littleEndian, + } = await gguf(URL_V1, { + typedMetadata: true, + }); + + // Get only the header portion of the original file to avoid memory issues + const headerSize = tensorInfoByteRange[1] + 1000; // Add some padding + const originalResponse = await fetch(URL_V1, { + headers: { Range: `bytes=0-${headerSize - 1}` }, + }); + const originalBlob = new Blob([await originalResponse.arrayBuffer()]); + + // Create updated metadata with a modified name + const updatedMetadata = { + ...originalMetadata, + "general.name": { + value: "Modified Test Model", + type: GGUFValueType.STRING, + }, + } as GGUFTypedMetadata; + + // Build the new header + const newHeaderBlob = await buildGgufHeader(originalBlob, updatedMetadata, { + littleEndian, + tensorInfoByteRange, + alignment: Number(originalMetadata["general.alignment"]?.value ?? 32), + }); + + expect(newHeaderBlob).toBeInstanceOf(Blob); + expect(newHeaderBlob.size).toBeGreaterThan(0); + + // Test that the new header can be parsed by creating a minimal test file + const tempFilePath = join(tmpdir(), `test-build-header-${Date.now()}.gguf`); + + // Just write the header to test parsing (without tensor data to avoid size issues) + fs.writeFileSync(tempFilePath, Buffer.from(await newHeaderBlob.arrayBuffer())); + + try { + const { typedMetadata: parsedMetadata } = await gguf(tempFilePath, { + typedMetadata: true, + allowLocalFile: true, + }); + + // Verify the updated metadata is preserved + expect(parsedMetadata["general.name"]).toEqual({ + value: "Modified Test Model", + type: GGUFValueType.STRING, + }); + + // Verify other metadata fields are preserved + expect(parsedMetadata.version).toEqual(originalMetadata.version); + expect(parsedMetadata.tensor_count).toEqual(originalMetadata.tensor_count); + expect(parsedMetadata["general.architecture"]).toEqual(originalMetadata["general.architecture"]); + } finally { + try { + fs.unlinkSync(tempFilePath); + } catch (error) { + // Ignore cleanup errors + } + } + }, 30_000); + + it("should handle metadata with array modifications", async () => { + // Parse a smaller GGUF file + const { + typedMetadata: originalMetadata, + tensorInfoByteRange, + littleEndian, + } = await gguf(URL_V1, { + typedMetadata: true, + }); + + // Get only the header portion + const headerSize = tensorInfoByteRange[1] + 1000; + const originalResponse = await fetch(URL_V1, { + headers: { Range: `bytes=0-${headerSize - 1}` }, + }); + const originalBlob = new Blob([await originalResponse.arrayBuffer()]); + + // Create updated metadata with a simple array + const updatedMetadata = { + ...originalMetadata, + "test.array": { + value: ["item1", "item2", "item3"], + type: GGUFValueType.ARRAY, + subType: GGUFValueType.STRING, + }, + kv_count: { + value: originalMetadata.kv_count.value + 1n, + type: originalMetadata.kv_count.type, + }, + } as GGUFTypedMetadata; + + // Build the new header + const newHeaderBlob = await buildGgufHeader(originalBlob, updatedMetadata, { + littleEndian, + tensorInfoByteRange, + alignment: Number(originalMetadata["general.alignment"]?.value ?? 32), + }); + + expect(newHeaderBlob).toBeInstanceOf(Blob); + expect(newHeaderBlob.size).toBeGreaterThan(0); + + // Test that the new header can be parsed + const tempFilePath = join(tmpdir(), `test-build-header-array-${Date.now()}.gguf`); + fs.writeFileSync(tempFilePath, Buffer.from(await newHeaderBlob.arrayBuffer())); + + try { + const { typedMetadata: parsedMetadata } = await gguf(tempFilePath, { + typedMetadata: true, + allowLocalFile: true, + }); + + // Verify the array was added correctly + expect(parsedMetadata["test.array"]).toEqual({ + value: ["item1", "item2", "item3"], + type: GGUFValueType.ARRAY, + subType: GGUFValueType.STRING, + }); + + // Verify structure integrity + expect(parsedMetadata.version).toEqual(originalMetadata.version); + expect(parsedMetadata.tensor_count).toEqual(originalMetadata.tensor_count); + expect(parsedMetadata.kv_count.value).toBe(originalMetadata.kv_count.value + 1n); + } finally { + try { + fs.unlinkSync(tempFilePath); + } catch (error) { + // Ignore cleanup errors + } + } + }, 30_000); + + it("should preserve tensor info correctly", async () => { + // Parse a smaller GGUF file + const { + typedMetadata: originalMetadata, + tensorInfoByteRange, + tensorInfos: originalTensorInfos, + littleEndian, + } = await gguf(URL_V1, { + typedMetadata: true, + }); + + // Get only the header portion + const headerSize = tensorInfoByteRange[1] + 1000; + const originalResponse = await fetch(URL_V1, { + headers: { Range: `bytes=0-${headerSize - 1}` }, + }); + const originalBlob = new Blob([await originalResponse.arrayBuffer()]); + + // Create updated metadata with minor changes + const updatedMetadata = { + ...originalMetadata, + "test.custom": { + value: "custom_value", + type: GGUFValueType.STRING, + }, + kv_count: { + value: originalMetadata.kv_count.value + 1n, + type: originalMetadata.kv_count.type, + }, + } as GGUFTypedMetadata; + + // Build the new header + const newHeaderBlob = await buildGgufHeader(originalBlob, updatedMetadata, { + littleEndian, + tensorInfoByteRange, + alignment: Number(originalMetadata["general.alignment"]?.value ?? 32), + }); + + // Test that the new header can be parsed + const tempFilePath = join(tmpdir(), `test-build-header-tensors-${Date.now()}.gguf`); + fs.writeFileSync(tempFilePath, Buffer.from(await newHeaderBlob.arrayBuffer())); + + try { + const { typedMetadata: parsedMetadata, tensorInfos: parsedTensorInfos } = await gguf(tempFilePath, { + typedMetadata: true, + allowLocalFile: true, + }); + + // Verify tensor info is preserved exactly + expect(parsedTensorInfos.length).toBe(originalTensorInfos.length); + expect(parsedTensorInfos[0]).toEqual(originalTensorInfos[0]); + expect(parsedTensorInfos[parsedTensorInfos.length - 1]).toEqual( + originalTensorInfos[originalTensorInfos.length - 1] + ); + + // Verify our custom metadata was added + expect(parsedMetadata["test.custom"]).toEqual({ + value: "custom_value", + type: GGUFValueType.STRING, + }); + + // Verify kv_count was updated + expect(parsedMetadata.kv_count.value).toBe(originalMetadata.kv_count.value + 1n); + } finally { + try { + fs.unlinkSync(tempFilePath); + } catch (error) { + // Ignore cleanup errors + } + } + }, 30_000); + + it("should handle different alignment values", async () => { + // Parse a smaller GGUF file + const { + typedMetadata: originalMetadata, + tensorInfoByteRange, + littleEndian, + } = await gguf(URL_V1, { + typedMetadata: true, + }); + + // Get only the header portion + const headerSize = tensorInfoByteRange[1] + 1000; + const originalResponse = await fetch(URL_V1, { + headers: { Range: `bytes=0-${headerSize - 1}` }, + }); + const originalBlob = new Blob([await originalResponse.arrayBuffer()]); + + // Create updated metadata + const updatedMetadata = { + ...originalMetadata, + "general.name": { + value: "Alignment Test Model", + type: GGUFValueType.STRING, + }, + } as GGUFTypedMetadata; + + // Test different alignment values + const alignments = [16, 32, 64]; + + for (const alignment of alignments) { + const newHeaderBlob = await buildGgufHeader(originalBlob, updatedMetadata, { + littleEndian, + tensorInfoByteRange, + alignment, + }); + + expect(newHeaderBlob).toBeInstanceOf(Blob); + expect(newHeaderBlob.size).toBeGreaterThan(0); + + // Verify the header size is aligned correctly + expect(newHeaderBlob.size % alignment).toBe(0); + } + }, 15_000); + + it("should validate tensorInfoByteRange parameters", async () => { + // Parse a smaller GGUF file + const { typedMetadata: originalMetadata, littleEndian } = await gguf(URL_V1, { + typedMetadata: true, + }); + + // Create a small test blob + const testBlob = new Blob([new Uint8Array(1000)]); + + // Test with valid range first to ensure function works + const validResult = await buildGgufHeader(testBlob, originalMetadata, { + littleEndian, + tensorInfoByteRange: [100, 200], // Valid: start < end + alignment: 32, + }); + + expect(validResult).toBeInstanceOf(Blob); + + // Test with edge case: start == end (should work as empty range) + const emptyRangeResult = await buildGgufHeader(testBlob, originalMetadata, { + littleEndian, + tensorInfoByteRange: [100, 100], // Edge case: empty range + alignment: 32, + }); + + expect(emptyRangeResult).toBeInstanceOf(Blob); + }, 15_000); + }); }); diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index 43c48115d6..c8cb2c20cd 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -419,6 +419,7 @@ export async function gguf( } } + const tensorInfoStartOffset = offset; const tensorInfos: GGUFTensorInfo[] = []; for (let i = 0; i < tensorCount.value; i++) { @@ -454,6 +455,7 @@ export async function gguf( // calculate absolute offset of tensor data const alignment: number = Number(metadata["general.alignment"] ?? GGUF_DEFAULT_ALIGNMENT); + const tensorInfoEndBeforePadOffset = offset; const tensorDataOffset = BigInt(GGML_PAD(offset, alignment)); if (params?.computeParametersCount && params?.typedMetadata) { @@ -468,6 +470,7 @@ export async function gguf( littleEndian, parameterCount, typedMetadata: typedMetadata as GGUFTypedMetadata, + tensorInfoByteRange: [tensorInfoStartOffset, tensorInfoEndBeforePadOffset], } as GGUFParseOutput & { parameterCount: number; typedMetadata: GGUFTypedMetadata }; } else if (params?.computeParametersCount) { const parameterCount = tensorInfos @@ -480,6 +483,7 @@ export async function gguf( tensorDataOffset, littleEndian, parameterCount, + tensorInfoByteRange: [tensorInfoStartOffset, tensorInfoEndBeforePadOffset], } as GGUFParseOutput & { parameterCount: number }; } else if (params?.typedMetadata) { return { @@ -488,9 +492,16 @@ export async function gguf( tensorDataOffset, littleEndian, typedMetadata: typedMetadata as GGUFTypedMetadata, + tensorInfoByteRange: [tensorInfoStartOffset, tensorInfoEndBeforePadOffset], } as GGUFParseOutput & { typedMetadata: GGUFTypedMetadata }; } else { - return { metadata, tensorInfos, tensorDataOffset, littleEndian } as GGUFParseOutput; + return { + metadata, + tensorInfos, + tensorDataOffset, + littleEndian, + tensorInfoByteRange: [tensorInfoStartOffset, tensorInfoEndBeforePadOffset], + } as GGUFParseOutput; } } @@ -664,13 +675,13 @@ export function serializeGgufMetadata( littleEndian?: boolean; /** * Alignment for tensor data - * @default 32 + * @default GGUF_DEFAULT_ALIGNMENT (32) */ alignment?: number; } = {} ): Uint8Array { const littleEndian = options.littleEndian ?? true; - const alignment = options.alignment ?? 32; // GGUF_DEFAULT_ALIGNMENT + const alignment = options.alignment ?? GGUF_DEFAULT_ALIGNMENT; const version = typedMetadata.version.value; // Start with GGUF magic number: "GGUF" @@ -764,6 +775,70 @@ export function serializeGgufMetadata( return result; } +/** + * Reconstructs a complete GGUF header by combining updated metadata with original tensor info. + * This function handles the entire process of serializing new metadata, extracting original tensor info, + * and properly padding the final header for alignment. + * + * @param originalFileBlob - The original GGUF file blob + * @param updatedMetadata - The updated typed metadata + * @param options - Reconstruction options + * @returns Promise resolving to the new header blob ready for file editing + */ +export async function buildGgufHeader( + originalFileBlob: Blob, + updatedMetadata: GGUFTypedMetadata, + options: { + /** Whether to use little endian byte order */ + littleEndian: boolean; + /** Tensor info byte range [start, endBeforePad] from parsing */ + tensorInfoByteRange: [number, number]; + /** Alignment for tensor data (default: GGUF_DEFAULT_ALIGNMENT (32)) */ + alignment?: number; + } +): Promise { + const alignment = options.alignment ?? GGUF_DEFAULT_ALIGNMENT; + const version = updatedMetadata.version.value; + + // Serialize the new metadata + const newHeaderBytes = serializeGgufMetadata(updatedMetadata, { + littleEndian: options.littleEndian, + alignment, + }); + + // Calculate KV end offset by parsing the serialized header + const view = new DataView(newHeaderBytes.buffer, newHeaderBytes.byteOffset, newHeaderBytes.byteLength); + let offset = 8; // magic+version + const tensorCount = readVersionedSize(view, offset, version, options.littleEndian); + offset += tensorCount.length; + const kvCount = readVersionedSize(view, offset, version, options.littleEndian); + offset += kvCount.length; + for (let i = BigInt(0); i < kvCount.value; i++) { + const key = readString(view, offset, version, options.littleEndian); + offset += key.length; + const valueType = view.getUint32(offset, options.littleEndian); + offset += 4; + const value = readMetadataValue(view, valueType, offset, version, options.littleEndian); + offset += value.length; + } + const kvEndOffset = offset; + + // Extract original tensor info section + const [tensorInfoStartOffset, tensorInfoEndBeforePadOffset] = options.tensorInfoByteRange; + const originalTensorInfoBlob = originalFileBlob.slice(tensorInfoStartOffset, tensorInfoEndBeforePadOffset); + + // Calculate final header with proper padding + const prePadLenNew = kvEndOffset + (tensorInfoEndBeforePadOffset - tensorInfoStartOffset); + const GGML_PAD = (x: number, n: number) => (x + n - 1) & ~(n - 1); + const targetTensorDataOffset = GGML_PAD(prePadLenNew, alignment); + const padLen = targetTensorDataOffset - prePadLenNew; + + // Reconstruct final header + return new Blob([newHeaderBytes.slice(0, kvEndOffset), originalTensorInfoBlob, new Uint8Array(padLen)], { + type: "application/octet-stream", + }); +} + export async function ggufAllShards( url: string, params?: { @@ -799,10 +874,13 @@ export async function ggufAllShards( parameterCount: shards.map(({ parameterCount }) => parameterCount).reduce((acc, val) => acc + val, 0), }; } else { - const { metadata, tensorInfos, tensorDataOffset, littleEndian, parameterCount } = await gguf(url, { - ...params, - computeParametersCount: true, - }); - return { shards: [{ metadata, tensorInfos, tensorDataOffset, littleEndian }], parameterCount }; + const { metadata, tensorInfos, tensorDataOffset, littleEndian, parameterCount, tensorInfoByteRange } = await gguf( + url, + { + ...params, + computeParametersCount: true, + } + ); + return { shards: [{ metadata, tensorInfos, tensorDataOffset, littleEndian, tensorInfoByteRange }], parameterCount }; } } diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts index 6461614105..8a48f78d8b 100644 --- a/packages/gguf/src/types.ts +++ b/packages/gguf/src/types.ts @@ -146,4 +146,5 @@ export interface GGUFParseOutput