From 2fdc8ec326064e143b8f1e3df9697309073d8493 Mon Sep 17 00:00:00 2001 From: Decio Ferreira Date: Thu, 21 Aug 2025 01:34:54 +0100 Subject: [PATCH] update _Bytes_read_string to use array instead of string as accumulator --- eslint.config.mjs | 1 + scripts/build.sh | 1 + scripts/replace-bytes-read-string.js | 82 ++++++++++++++++++++++++++++ src/Compiler/Parse/Primitives.elm | 11 +--- 4 files changed, 86 insertions(+), 9 deletions(-) create mode 100644 scripts/replace-bytes-read-string.js diff --git a/eslint.config.mjs b/eslint.config.mjs index c9fe9fc51..79f74a11d 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -21,6 +21,7 @@ export default defineConfig([ { files: ["lib/browser.js"], languageOptions: { globals: globals.browser } }, { files: ["lib/node.js"], languageOptions: { globals: globals.node } }, { files: ["try/**/*.{js,mjs,cjs}"], languageOptions: { globals: { ...globals.browser, ...globals.node } } }, + { files: ["scripts/*.js"], languageOptions: { globals: globals.node } }, { files: ["**/*.{js,mjs,cjs}"], plugins: { js }, diff --git a/scripts/build.sh b/scripts/build.sh index 65c50b89d..b0207051e 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -27,6 +27,7 @@ js="$filepath.js" min="$filepath.min.js" guida make --optimize --output=$js $elm_entry +node scripts/replace-bytes-read-string.js $js uglifyjs $js --compress "pure_funcs=[F2,F3,F4,F5,F6,F7,F8,F9,A2,A3,A4,A5,A6,A7,A8,A9],pure_getters,keep_fargs=false,unsafe_comps,unsafe" | uglifyjs --mangle --output $min diff --git a/scripts/replace-bytes-read-string.js b/scripts/replace-bytes-read-string.js new file mode 100644 index 000000000..cb85d81de --- /dev/null +++ b/scripts/replace-bytes-read-string.js @@ -0,0 +1,82 @@ +#!/usr/bin/env node + +/* This change prevents V8 from retaining large "concatenated string" chains, which can cause OOMs. + Tested against `rtfeldman/elm-css` compilation. + + See the related discussion for context: https://discourse.elm-lang.org/t/guida-compiler-was-there-are-3-elm-compilers-written-in-elm/10329/34 + and issue: https://github.com/guida-lang/compiler/issues/107 + */ + +const fs = require('node:fs'); + +const argv = process.argv.slice(2); +const path = argv[0]; + +const data = fs + .readFileSync(path, { encoding: 'utf8', flag: 'r' }) + .replace(`var _Bytes_read_string = F3(function(len, bytes, offset) +{ + var string = ''; + var end = offset + len; + for (; offset < end;) + { + var byte = bytes.getUint8(offset++); + string += + (byte < 128) + ? String.fromCharCode(byte) + : + ((byte & 0xE0 /* 0b11100000 */) === 0xC0 /* 0b11000000 */) + ? String.fromCharCode((byte & 0x1F /* 0b00011111 */) << 6 | bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) + : + ((byte & 0xF0 /* 0b11110000 */) === 0xE0 /* 0b11100000 */) + ? String.fromCharCode( + (byte & 0xF /* 0b00001111 */) << 12 + | (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 6 + | bytes.getUint8(offset++) & 0x3F /* 0b00111111 */ + ) + : + (byte = + ((byte & 0x7 /* 0b00000111 */) << 18 + | (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 12 + | (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 6 + | bytes.getUint8(offset++) & 0x3F /* 0b00111111 */ + ) - 0x10000 + , String.fromCharCode(Math.floor(byte / 0x400) + 0xD800, byte % 0x400 + 0xDC00) + ); + } + return _Utils_Tuple2(offset, string); +});`, `var _Bytes_read_string = F3(function(len, bytes, offset) +{ + var string = []; + var end = offset + len; + for (; offset < end;) + { + var byte = bytes.getUint8(offset++); + string.push( + (byte < 128) + ? String.fromCharCode(byte) + : + ((byte & 0xE0 /* 0b11100000 */) === 0xC0 /* 0b11000000 */) + ? String.fromCharCode((byte & 0x1F /* 0b00011111 */) << 6 | bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) + : + ((byte & 0xF0 /* 0b11110000 */) === 0xE0 /* 0b11100000 */) + ? String.fromCharCode( + (byte & 0xF /* 0b00001111 */) << 12 + | (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 6 + | bytes.getUint8(offset++) & 0x3F /* 0b00111111 */ + ) + : + (byte = + ((byte & 0x7 /* 0b00000111 */) << 18 + | (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 12 + | (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 6 + | bytes.getUint8(offset++) & 0x3F /* 0b00111111 */ + ) - 0x10000 + , String.fromCharCode(Math.floor(byte / 0x400) + 0xD800, byte % 0x400 + 0xDC00) + ) + ); + } + return _Utils_Tuple2(offset, string.join('')); +});`); + +fs.writeFileSync(path, data, { encoding: 'utf8', flag: 'w' }); \ No newline at end of file diff --git a/src/Compiler/Parse/Primitives.elm b/src/Compiler/Parse/Primitives.elm index 302029e5f..91587d865 100644 --- a/src/Compiler/Parse/Primitives.elm +++ b/src/Compiler/Parse/Primitives.elm @@ -483,24 +483,17 @@ snippetEncoder (Snippet { fptr, offset, length, offRow, offCol }) = snippetDecoder : BD.Decoder Snippet snippetDecoder = - -- This `String.toList` -> `String.fromList` round-trip prevents V8 from retaining - -- large "concatenated string" chains, which can cause OOMs. - -- Tested against `rtfeldman/elm-css` compilation. - -- The split call avoids elm-review flags for this pattern. - -- - -- See the related discussion for context: - -- https://discourse.elm-lang.org/t/guida-compiler-was-there-are-3-elm-compilers-written-in-elm/10329/25 BD.map5 (\fptr offset length offRow offCol -> Snippet - { fptr = String.fromList fptr + { fptr = fptr , offset = offset , length = length , offRow = offRow , offCol = offCol } ) - (BD.map String.toList BD.string) + BD.string BD.int BD.int BD.int