-
Notifications
You must be signed in to change notification settings - Fork 84
feature/deserializing-header #334
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from 6 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
e9775ad
Add initial check for field list frame type in header deserialization
Krmjn09 e11648f
parse record envelope and field list envelope from RNTuple header
Krmjn09 d79c587
deserialize envelope, feature flags, and field list header from RNTup…
Krmjn09 321816a
Doing Suggested Changes in the code and removing CI for version in rn…
Krmjn09 2948249
deserialize each Field Record Frame and log field name with type
Krmjn09 7ae6a2c
Implement full deserialization logic for header and footer envelope a…
Krmjn09 9d62f32
commit changes
Krmjn09 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,6 +70,13 @@ class RBufferReader { | |
return val; | ||
} | ||
|
||
// Read 64-bit float (8 BYTES) | ||
readF64() { | ||
const val = this.view.getFloat64(this.offset, LITTLE_ENDIAN); | ||
this.offset += 8; | ||
return val; | ||
} | ||
|
||
// Read a string with 32-bit length prefix | ||
readString() { | ||
const length = this.readU32(); | ||
|
@@ -92,55 +99,241 @@ class RBufferReader { | |
this.offset += 8; | ||
return val; | ||
} | ||
|
||
} | ||
|
||
|
||
class RNTupleDescriptorBuilder { | ||
|
||
deserializeHeader(header_blob) { | ||
if (!header_blob) return; | ||
|
||
const reader = new RBufferReader(header_blob); | ||
deserializeHeader(header_blob) { | ||
if (!header_blob) return; | ||
|
||
// 1. Read header version (4 bytes) | ||
this.version = reader.readU32(); | ||
const reader = new RBufferReader(header_blob); | ||
// Read the envelope metadata | ||
this._readEnvelopeMetadata(reader); | ||
|
||
// 2. Read feature flags (4 bytes) | ||
this.headerFeatureFlags = reader.readU32(); | ||
// TODO: Validate the envelope checksum at the end of deserialization | ||
// const payloadStart = reader.offset; | ||
|
||
// 3. Read xxhash3 (64-bit, 8 bytes) | ||
this.xxhash3 = reader.readU64(); | ||
// Read feature flags list (may span multiple 64-bit words) | ||
this._readFeatureFlags(reader); | ||
|
||
// 4. Read name (length-prefixed string) | ||
this.name = reader.readString(); | ||
// Read metadata strings | ||
this._readStrings(reader); | ||
|
||
// 5. Read description (length-prefixed string) | ||
this.description = reader.readString(); | ||
|
||
|
||
// Console output to verify deserialization results | ||
console.log('Version:', this.version); | ||
console.log('Header Feature Flags:', this.headerFeatureFlags); | ||
console.log('xxhash3:', '0x' + this.xxhash3.toString(16).padStart(16, '0')); | ||
console.log('Name:', this.name); | ||
console.log('Description:', this.description); | ||
} | ||
// List frame: list of field record frames | ||
this._readFieldDescriptors(reader); | ||
// List frame: list of column record frames | ||
this._readColumnDescriptors(reader); | ||
// Read alias column descriptors | ||
this._readAliasColumn(reader); | ||
// Read Extra Type Information | ||
this._readExtraTypeInformation(reader); | ||
} | ||
|
||
|
||
deserializeFooter(footer_blob) { | ||
if (!footer_blob) return; | ||
|
||
const reader = new RBufferReader(footer_blob); | ||
// Read the envelope metadata | ||
this._readEnvelopeMetadata(reader); | ||
|
||
// Feature flag(32 bits) | ||
this.footerFeatureFlags = reader.readU32(); | ||
this.headerChecksum = reader.readU32(); | ||
// Header checksum (64-bit xxhash3) | ||
this.headerChecksum = reader.readU64(); | ||
|
||
// Schema extension record frame (4 list frames inside) | ||
this._readFieldDescriptors(reader); | ||
this._readColumnDescriptors(reader); | ||
this._readAliasColumnDescriptors(reader); | ||
this._readExtraTypeInfos(reader); | ||
|
||
console.log('Footer Feature Flags:', this.footerFeatureFlags); | ||
console.log('Header Checksum:', this.headerChecksum); | ||
// Cluster Group record frame | ||
this._readClusterGroups(reader); | ||
} | ||
|
||
|
||
_readEnvelopeMetadata(reader) { | ||
const typeAndLength = reader.readU64(); | ||
|
||
// Envelope metadata | ||
// The 16 bits are the envelope type ID, and the 48 bits are the envelope length | ||
this.envelopeType = Number(typeAndLength & 0xFFFFn); | ||
this.envelopeLength = Number((typeAndLength >> 16n) & 0xFFFFFFFFFFFFn); | ||
|
||
console.log('Envelope Type ID:', this.envelopeType); | ||
console.log('Envelope Length:', this.envelopeLength); | ||
} | ||
_readFeatureFlags(reader) { | ||
this.featureFlags = []; | ||
while (true) { | ||
const val = reader.readU64(); | ||
this.featureFlags.push(val); | ||
if ((val & 0x8000000000000000n) === 0n) break; // MSB not set: end of list | ||
} | ||
|
||
// verify all feature flags are zero | ||
if (this.featureFlags.some(v => v !== 0n)) | ||
throw new Error('Unexpected non-zero feature flags: ' + this.featureFlags); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please indent this line properly |
||
} | ||
_readStrings(reader) { | ||
silverweed marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
this.name = reader.readString(); | ||
this.description = reader.readString(); | ||
this.writer = reader.readString(); | ||
// TODO: Remove debug logs before finalizing | ||
console.log('Name:', this.name); | ||
console.log('Description:', this.description); | ||
console.log('Writer:', this.writer); | ||
silverweed marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
} | ||
_readFieldDescriptors(reader) { | ||
this.fieldListSize = reader.readS64(); // signed 64-bit | ||
silverweed marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
const fieldListIsList = this.fieldListSize < 0; | ||
|
||
if (!fieldListIsList) | ||
throw new Error('Field list frame is not a list frame, which is required.'); | ||
|
||
const fieldListCount = reader.readU32(); // number of field entries | ||
console.log('Field List Count:', fieldListCount); | ||
|
||
// List frame: list of field record frames | ||
|
||
const fieldDescriptors = []; | ||
for (let i = 0; i < fieldListCount; ++i) { | ||
const fieldVersion = reader.readU32(), | ||
typeVersion = reader.readU32(), | ||
parentFieldId = reader.readU32(), | ||
structRole = reader.readU16(), | ||
flags = reader.readU16(), | ||
|
||
fieldName = reader.readString(), | ||
typeName = reader.readString(), | ||
typeAlias = reader.readString(), | ||
description = reader.readString(); | ||
let arraySize = null, sourceFieldId = null, checksum = null; | ||
|
||
if (flags & 0x1) arraySize = reader.readU32(); | ||
if (flags & 0x2) sourceFieldId = reader.readU32(); | ||
if (flags & 0x4) checksum = reader.readU32(); | ||
|
||
fieldDescriptors.push({ | ||
fieldVersion, | ||
typeVersion, | ||
parentFieldId, | ||
structRole, | ||
flags, | ||
fieldName, | ||
typeName, | ||
typeAlias, | ||
description, | ||
arraySize, | ||
sourceFieldId, | ||
checksum | ||
}); | ||
console.log(`Field ${i + 1}:`, fieldName, '&&', typeName); | ||
} | ||
Krmjn09 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
this.fieldDescriptors = fieldDescriptors; | ||
} | ||
_readColumnDescriptors(reader) { | ||
this.columnListSize = reader.readS64(); // signed 64-bit | ||
const columnListIsList = this.columnListSize < 0; | ||
if (!columnListIsList) | ||
throw new Error('Column list frame is not a list frame, which is required.'); | ||
const columnListCount = reader.readU32(); // number of column entries | ||
console.log('Column List Count:', columnListCount); | ||
const columnDescriptors = []; | ||
for (let i = 0; i < columnListCount; ++i) { | ||
const coltype = reader.readU16(), | ||
bitsOnStrorage = reader.readU16(), | ||
fieldId = reader.readU32(), | ||
flags = reader.readU16(), | ||
representationIndex = reader.readU16(); | ||
|
||
let firstElementIndex = null, minValue = null, maxValue = null; | ||
if (flags & 0x1) firstElementIndex = reader.readU64(); | ||
if (flags & 0x2){ | ||
minValue = reader.readF64(); | ||
maxValue = reader.readF64(); | ||
} | ||
|
||
columnDescriptors.push({ | ||
coltype, | ||
bitsOnStrorage, | ||
fieldId, | ||
flags, | ||
representationIndex, | ||
firstElementIndex, | ||
minValue, | ||
maxValue, | ||
isDeferred: (flags & 0x01) !== 0, | ||
isSuppressed: (firstElementIndex !== null && firstElementIndex < 0) | ||
}); | ||
silverweed marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
} | ||
this.columnDescriptors = columnDescriptors; | ||
} | ||
_readAliasColumn(reader){ | ||
silverweed marked this conversation as resolved.
Show resolved
Hide resolved
|
||
this.aliasColumnListSize = reader.readS64(); // signed 64-bit | ||
const aliasListisList = this.aliasColumnListSize < 0; | ||
if (!aliasListisList) | ||
throw new Error('Alias column list frame is not a list frame, which is required.'); | ||
const aliasColumnCount = reader.readU32(); // number of alias column entries | ||
console.log('Alias Column List Count:', aliasColumnCount); | ||
const aliasColumns = []; | ||
for (let i = 0; i < aliasColumnCount; ++i){ | ||
const physicalColumnId = reader.readU32(), | ||
fieldId = reader.readU32(); | ||
aliasColumns.push({ | ||
physicalColumnId, | ||
fieldId | ||
}); | ||
} | ||
this.aliasColumns = aliasColumns; | ||
} | ||
_readExtraTypeInformation(reader) { | ||
this.extraTypeInfoListSize = reader.readS64(); // signed 64-bit | ||
const isList = this.extraTypeInfoListSize < 0; | ||
|
||
if (!isList) | ||
throw new Error('Extra type info frame is not a list frame, which is required.'); | ||
|
||
const entryCount = reader.readU32(); // number of extra type info entries | ||
console.log('Extra Type Info Count:', entryCount); | ||
|
||
const extraTypeInfo = []; | ||
for (let i = 0; i < entryCount; ++i) { | ||
const contentId = reader.readU32(), | ||
typeVersion = reader.readU32(); | ||
extraTypeInfo.push({ | ||
contentId, | ||
typeVersion | ||
}); | ||
} | ||
this.extraTypeInfo = extraTypeInfo; | ||
} | ||
_readClusterGroups(reader){ | ||
const clusterGroupListSize = reader.readS64(), | ||
isList = clusterGroupListSize < 0; | ||
if (!isList) | ||
throw new Error('Cluster group frame is not a list frame.'); | ||
|
||
const clusterGroupCount = reader.readU32(); | ||
console.log('Cluster Group Count:', clusterGroupCount); | ||
|
||
const clusterGroups = []; | ||
for (let i = 0; i < clusterGroupCount; ++i) { | ||
const minEntry = reader.readS64(), | ||
entrySpan = reader.readS64(), | ||
numClusters = reader.readU32(); | ||
clusterGroups.push({ | ||
minEntry, | ||
entrySpan, | ||
numClusters | ||
}); | ||
} | ||
this.clusterGroups = clusterGroups; | ||
} | ||
|
||
} | ||
|
||
/** @summary Very preliminary function to read header/footer from RNTuple | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can just be
Number(typeAndLength >>> 16n)
since the unsigned right shift will zero the upper bits. (important to use the triple>>>
)