From 7ad6fb384f667cb4299ddd8f41f5cd0ed4e4ab19 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Wed, 6 Aug 2025 11:39:59 +0530 Subject: [PATCH 1/4] Added delta decoding logic --- modules/rntuple.mjs | 54 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index a50d611f3..3b3e945ac 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -198,7 +198,9 @@ function recontructUnsplitBuffer(blob, columnDescriptor) { coltype === ENTupleColumnType.kSplitUInt64 || coltype === ENTupleColumnType.kSplitReal16 || coltype === ENTupleColumnType.kSplitReal32 || - coltype === ENTupleColumnType.kSplitReal64 + coltype === ENTupleColumnType.kSplitReal64 || + coltype === ENTupleColumnType.kSplitIndex32 || + coltype === ENTupleColumnType.kSplitIndex64 ) { const byteSize = getTypeByteSize(coltype), splitView = new DataView(blob.buffer, blob.byteOffset, blob.byteLength), @@ -255,6 +257,33 @@ function recontructUnsplitBuffer(blob, columnDescriptor) { } +/** + * @summary Decode a reconstructed index buffer (32- or 64-bit deltas to absolute indices) + */ +function DecodeDeltaIndex(blob, coltype) { + if (coltype === ENTupleColumnType.kIndex32) { + // create Int32Array view of the 4-byte elements + const deltas = new Int32Array(blob.buffer || blob, blob.byteOffset || 0, blob.byteLength/4), + result = new Int32Array(deltas.length); + if (deltas.length > 0) result[0] = deltas[0]; + for (let i = 1; i < deltas.length; ++i) + result[i] = result[i - 1] + deltas[i]; + return { blob: result, coltype }; + } + + if (coltype === ENTupleColumnType.kIndex64) { + const deltas = new BigInt64Array(blob.buffer || blob, blob.byteOffset || 0, blob.byteLength/8), + result = new BigInt64Array(deltas.length); + if (deltas.length > 0) result[0] = deltas[0]; + for (let i = 1; i < deltas.length; ++i) + result[i] = result[i - 1] + deltas[i]; + return { blob: result, coltype }; + } + + throw new Error(`DecodeDeltaIndex: unsupported column type ${coltype}`); +} + + // Envelope Types // TODO: Define usage logic for envelope types in future // const kEnvelopeTypeHeader = 0x01, @@ -686,13 +715,21 @@ class RNTupleDescriptorBuilder { // Example Of Deserializing Page Content deserializePage(blob, columnDescriptor) { - const { + const originalColtype = columnDescriptor.coltype, + { blob: processedBlob, coltype - } = recontructUnsplitBuffer(blob, columnDescriptor), - byteSize = getTypeByteSize(coltype), - reader = new RBufferReader(processedBlob), - values = []; + } = recontructUnsplitBuffer(blob, columnDescriptor); + + // Handle split index types + if (originalColtype === ENTupleColumnType.kSplitIndex32 || originalColtype=== ENTupleColumnType.kSplitIndex64) { + const { blob: decodedArray } = DecodeDeltaIndex(processedBlob, coltype); + return decodedArray; + } + + const byteSize = getTypeByteSize(coltype), + reader = new RBufferReader(processedBlob), + values = []; if (!byteSize) throw new Error('Invalid or unsupported column type: cannot determine byte size'); @@ -733,8 +770,6 @@ class RNTupleDescriptorBuilder { break; case ENTupleColumnType.kUInt8: case ENTupleColumnType.kByte: - case ENTupleColumnType.kByteArray: - case ENTupleColumnType.kIndexArrayU8: val = reader.readU8(); break; case ENTupleColumnType.kChar: @@ -743,9 +778,6 @@ class RNTupleDescriptorBuilder { case ENTupleColumnType.kIndex64: val = reader.readU64(); break; - case ENTupleColumnType.kSplitIndex64: - val = reader.readU32(); - break; default: throw new Error(`Unsupported column type: ${columnDescriptor.coltype}`); } From 56c1e36dd70f250c15fec4b2a4bb10f33f21978a Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Wed, 6 Aug 2025 14:27:51 +0530 Subject: [PATCH 2/4] commited the suggestions --- modules/rntuple.mjs | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 3b3e945ac..333cad3df 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -261,26 +261,22 @@ function recontructUnsplitBuffer(blob, columnDescriptor) { * @summary Decode a reconstructed index buffer (32- or 64-bit deltas to absolute indices) */ function DecodeDeltaIndex(blob, coltype) { + let deltas, result; + if (coltype === ENTupleColumnType.kIndex32) { - // create Int32Array view of the 4-byte elements - const deltas = new Int32Array(blob.buffer || blob, blob.byteOffset || 0, blob.byteLength/4), - result = new Int32Array(deltas.length); - if (deltas.length > 0) result[0] = deltas[0]; - for (let i = 1; i < deltas.length; ++i) - result[i] = result[i - 1] + deltas[i]; - return { blob: result, coltype }; - } - - if (coltype === ENTupleColumnType.kIndex64) { - const deltas = new BigInt64Array(blob.buffer || blob, blob.byteOffset || 0, blob.byteLength/8), - result = new BigInt64Array(deltas.length); - if (deltas.length > 0) result[0] = deltas[0]; - for (let i = 1; i < deltas.length; ++i) - result[i] = result[i - 1] + deltas[i]; - return { blob: result, coltype }; - } - - throw new Error(`DecodeDeltaIndex: unsupported column type ${coltype}`); + deltas = new Int32Array(blob.buffer || blob, blob.byteOffset || 0, blob.byteLength / 4); + result = new Int32Array(deltas.length); + } else if (coltype === ENTupleColumnType.kIndex64) { + deltas = new BigInt64Array(blob.buffer || blob, blob.byteOffset || 0, blob.byteLength / 8); + result = new BigInt64Array(deltas.length); + } else + throw new Error(`DecodeDeltaIndex: unsupported column type ${coltype}`); + + if (deltas.length > 0) result[0] = deltas[0]; + for (let i = 1; i < deltas.length; ++i) + result[i] = result[i - 1] + deltas[i]; + + return { blob: result, coltype }; } @@ -716,15 +712,14 @@ class RNTupleDescriptorBuilder { // Example Of Deserializing Page Content deserializePage(blob, columnDescriptor) { const originalColtype = columnDescriptor.coltype, - { - blob: processedBlob, - coltype - } = recontructUnsplitBuffer(blob, columnDescriptor); + { coltype } = recontructUnsplitBuffer(blob, columnDescriptor); + let { blob: processedBlob } = recontructUnsplitBuffer(blob, columnDescriptor); + // Handle split index types if (originalColtype === ENTupleColumnType.kSplitIndex32 || originalColtype=== ENTupleColumnType.kSplitIndex64) { const { blob: decodedArray } = DecodeDeltaIndex(processedBlob, coltype); - return decodedArray; + processedBlob = decodedArray; } const byteSize = getTypeByteSize(coltype), From 93c7e3b7216eac5baf144c2e4f6d5d2a2882cab7 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Wed, 6 Aug 2025 23:13:24 +0530 Subject: [PATCH 3/4] Fix split index field reconstruction logic in deserialization --- modules/rntuple.mjs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 333cad3df..4a7e7e369 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -158,6 +158,7 @@ function getTypeByteSize(coltype) { case ENTupleColumnType.kSplitInt64: case ENTupleColumnType.kSplitUInt64: case ENTupleColumnType.kSplitReal64: + case ENTupleColumnType.kSplitIndex64: return 8; case ENTupleColumnType.kReal32: @@ -168,7 +169,6 @@ function getTypeByteSize(coltype) { case ENTupleColumnType.kSplitUInt32: case ENTupleColumnType.kSplitReal32: case ENTupleColumnType.kSplitIndex32: - case ENTupleColumnType.kSplitIndex64: return 4; case ENTupleColumnType.kInt16: case ENTupleColumnType.kUInt16: @@ -712,7 +712,7 @@ class RNTupleDescriptorBuilder { // Example Of Deserializing Page Content deserializePage(blob, columnDescriptor) { const originalColtype = columnDescriptor.coltype, - { coltype } = recontructUnsplitBuffer(blob, columnDescriptor); + { coltype } = recontructUnsplitBuffer(blob, columnDescriptor); let { blob: processedBlob } = recontructUnsplitBuffer(blob, columnDescriptor); @@ -742,20 +742,19 @@ class RNTupleDescriptorBuilder { val = reader.readF32(); break; case ENTupleColumnType.kInt64: - val = reader.readI64(); + val = reader.readS64(); break; case ENTupleColumnType.kUInt64: val = reader.readU64(); break; case ENTupleColumnType.kInt32: - case ENTupleColumnType.kIndex32: - val = reader.readU32(); + val = reader.readS32(); break; case ENTupleColumnType.kUInt32: val = reader.readU32(); break; case ENTupleColumnType.kInt16: - val = reader.readI16(); + val = reader.readS16(); break; case ENTupleColumnType.kUInt16: val = reader.readU16(); @@ -770,8 +769,9 @@ class RNTupleDescriptorBuilder { case ENTupleColumnType.kChar: val = String.fromCharCode(reader.readS8()); break; + case ENTupleColumnType.kIndex32: case ENTupleColumnType.kIndex64: - val = reader.readU64(); + val = processedBlob[i]; break; default: throw new Error(`Unsupported column type: ${columnDescriptor.coltype}`); @@ -981,7 +981,12 @@ function readNextCluster(rntuple, selector) { // splitting string fields into offset and payload components if (field.typeName === 'std::string') { - if (colDesc.coltype === ENTupleColumnType.kIndex64) // Index64/Index32 + if ( + colDesc.coltype === ENTupleColumnType.kIndex64 || + colDesc.coltype === ENTupleColumnType.kIndex32 || + colDesc.coltype === ENTupleColumnType.kSplitIndex64 || + colDesc.coltype === ENTupleColumnType.kSplitIndex32 + ) // Index64/Index32 rntuple._clusterData[field.fieldName][0] = values; // Offsets else if (colDesc.coltype === ENTupleColumnType.kChar) rntuple._clusterData[field.fieldName][1] = values; // Payload From a1f77f9a25fb3a42f24482db960d33ed9e3d1163 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Wed, 6 Aug 2025 23:27:08 +0530 Subject: [PATCH 4/4] Corrected reader.readS32 and 64 --- modules/rntuple.mjs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 4a7e7e369..3584ad6ca 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -770,8 +770,10 @@ class RNTupleDescriptorBuilder { val = String.fromCharCode(reader.readS8()); break; case ENTupleColumnType.kIndex32: + val = reader.readS32(); + break; case ENTupleColumnType.kIndex64: - val = processedBlob[i]; + val = reader.readS64(); break; default: throw new Error(`Unsupported column type: ${columnDescriptor.coltype}`);