Skip to content

Commit d56b238

Browse files
committed
Use geo parquet as the primary format for the files as opposed to plain vanilla parquet #719
WIP prepare for change by supporting geometry point expansion
1 parent 6d471e0 commit d56b238

File tree

1 file changed

+105
-3
lines changed

1 file changed

+105
-3
lines changed

web-client/src/utils/SrParquetUtils.ts

Lines changed: 105 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -459,11 +459,90 @@ export async function getWritableFileStream(suggestedName: string, mimeType: str
459459
}
460460

461461

462-
export async function exportCsvStreamed(fileName: string, headerCols: Ref<string[]>) {
462+
/**
463+
* Parse WKB Point geometry to extract x, y, z coordinates
464+
* WKB Point format (little-endian):
465+
* - byte 0: byte order (1 = little-endian, 0 = big-endian)
466+
* - bytes 1-4: geometry type (1 = Point, 1001 = PointZ)
467+
* - bytes 5-12: x coordinate (double)
468+
* - bytes 13-20: y coordinate (double)
469+
* - bytes 21-28: z coordinate (double, if PointZ)
470+
*/
471+
function parseWkbPoint(wkb: Uint8Array): { x: number; y: number; z?: number } | null {
472+
if (!wkb || wkb.length < 21) {
473+
return null;
474+
}
475+
476+
const view = new DataView(wkb.buffer, wkb.byteOffset, wkb.byteLength);
477+
const byteOrder = wkb[0]; // 1 = little-endian, 0 = big-endian
478+
const littleEndian = byteOrder === 1;
479+
480+
// Read geometry type (uint32 at offset 1)
481+
const geomType = view.getUint32(1, littleEndian);
482+
483+
// 1 = Point (2D), 1001 = PointZ (3D), 0x80000001 = PointZ with SRID
484+
const isPoint = geomType === 1 || geomType === 1001 || geomType === 0x80000001;
485+
486+
if (!isPoint) {
487+
console.warn('Geometry type is not a Point:', geomType);
488+
return null;
489+
}
490+
491+
// Read coordinates (doubles at offsets 5, 13, and optionally 21)
492+
const x = view.getFloat64(5, littleEndian);
493+
const y = view.getFloat64(13, littleEndian);
494+
495+
// Check if we have Z coordinate (PointZ types)
496+
const hasZ = (geomType === 1001 || geomType === 0x80000001) && wkb.length >= 29;
497+
const z = hasZ ? view.getFloat64(21, littleEndian) : undefined;
498+
499+
return { x, y, z };
500+
}
501+
502+
interface RecordInfo {
503+
time?: string;
504+
as_geo?: boolean;
505+
x: string;
506+
y: string;
507+
z?: string;
508+
}
509+
510+
interface GeoMetadata {
511+
version?: string;
512+
primary_column?: string;
513+
columns?: Record<string, any>;
514+
}
515+
516+
export async function exportCsvStreamed(fileName: string, headerCols: Ref<string[]>, expandGeometry = false) {
463517
const duck = await createDuckDbClient();
464518
await duck.insertOpfsParquet(fileName);
465519

466-
const columns = headerCols.value;
520+
let columns = headerCols.value;
521+
let geometryColumn: string | null = null;
522+
let recordInfo: RecordInfo | null = null;
523+
524+
// If expandGeometry is true, check for geo metadata and adjust columns
525+
if (expandGeometry) {
526+
const metadata = await duck.getAllParquetMetadata(fileName);
527+
if (metadata?.geo && metadata.recordinfo) {
528+
const geoMetadata: GeoMetadata = JSON.parse(metadata.geo);
529+
geometryColumn = geoMetadata.primary_column || 'geometry';
530+
const parsedRecordInfo: RecordInfo = JSON.parse(metadata.recordinfo);
531+
recordInfo = parsedRecordInfo;
532+
533+
// Build new column list: remove geometry, add x,y,z
534+
const xColName = parsedRecordInfo.x || 'lon';
535+
const yColName = parsedRecordInfo.y || 'lat';
536+
const zColName = parsedRecordInfo.z || 'height';
537+
538+
columns = columns.filter(col => col !== geometryColumn);
539+
columns.push(xColName, yColName);
540+
if (parsedRecordInfo.z) {
541+
columns.push(zColName);
542+
}
543+
}
544+
}
545+
467546
const encoder = new TextEncoder();
468547
const { readRows } = await duck.query(`SELECT * FROM "${fileName}"`);
469548

@@ -481,8 +560,31 @@ export async function exportCsvStreamed(fileName: string, headerCols: Ref<string
481560

482561
for await (const rows of readRows(1000)) {
483562
const lines = rows.map(row => {
563+
let processedRow = row;
564+
565+
// If expanding geometry, parse WKB and add x,y,z columns
566+
if (expandGeometry && geometryColumn && recordInfo) {
567+
const wkb = row[geometryColumn] as Uint8Array;
568+
if (wkb) {
569+
const coords = parseWkbPoint(wkb);
570+
if (coords) {
571+
const xColName = recordInfo.x || 'lon';
572+
const yColName = recordInfo.y || 'lat';
573+
const zColName = recordInfo.z || 'height';
574+
575+
processedRow = { ...row };
576+
processedRow[xColName] = coords.x;
577+
processedRow[yColName] = coords.y;
578+
if (coords.z !== undefined && recordInfo.z) {
579+
processedRow[zColName] = coords.z;
580+
}
581+
delete processedRow[geometryColumn]; // Remove geometry column
582+
}
583+
}
584+
}
585+
484586
const processed = columns.map(col => {
485-
let val = row[col];
587+
let val = processedRow[col];
486588
if (col === 'srcid') val = lookup[val] ?? `unknown_srcid_${val}`;
487589
return safeCsvCell(val);
488590
});

0 commit comments

Comments
 (0)