diff --git a/embed/bcw2/bcw2.wasm b/embed/bcw2/bcw2.wasm index fedb5522..d770edab 100755 Binary files a/embed/bcw2/bcw2.wasm and b/embed/bcw2/bcw2.wasm differ diff --git a/embed/bcw2/build.sh b/embed/bcw2/build.sh index 3aee93f4..83fc712a 100755 --- a/embed/bcw2/build.sh +++ b/embed/bcw2/build.sh @@ -60,7 +60,7 @@ cd ~- "$BINARYEN/wasm-ctor-eval" -g -c _initialize bcw2.wasm -o bcw2.tmp "$BINARYEN/wasm-opt" -g --strip --strip-producers -c -O3 \ - bcw2.tmp -o bcw2.wasm \ + bcw2.tmp -o bcw2.wasm --low-memory-unused \ --enable-simd --enable-mutable-globals --enable-multivalue \ --enable-bulk-memory --enable-reference-types \ --enable-nontrapping-float-to-int --enable-sign-ext \ No newline at end of file diff --git a/embed/build.sh b/embed/build.sh index a6b21d36..117f8f15 100755 --- a/embed/build.sh +++ b/embed/build.sh @@ -27,7 +27,7 @@ trap 'rm -f sqlite3.tmp' EXIT "$BINARYEN/wasm-ctor-eval" -g -c _initialize sqlite3.wasm -o sqlite3.tmp "$BINARYEN/wasm-opt" -g --strip --strip-producers -c -O3 \ - sqlite3.tmp -o sqlite3.wasm \ + sqlite3.tmp -o sqlite3.wasm --low-memory-unused \ --enable-simd --enable-mutable-globals --enable-multivalue \ --enable-bulk-memory --enable-reference-types \ --enable-nontrapping-float-to-int --enable-sign-ext \ No newline at end of file diff --git a/embed/sqlite3.wasm b/embed/sqlite3.wasm index 31e10431..7be8de59 100755 Binary files a/embed/sqlite3.wasm and b/embed/sqlite3.wasm differ diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index 44deede4..3d9aa3dc 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -18,6 +18,8 @@ trap 'rm -f libc.tmp' EXIT -mbulk-memory -mreference-types \ -mnontrapping-fptoint -msign-ext \ -fno-stack-protector -fno-stack-clash-protection \ + -Wl,--stack-first \ + -Wl,--import-undefined \ -Wl,--initial-memory=16777216 \ -Wl,--export=memchr \ -Wl,--export=memcmp \ diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index 1e4a98c2..68687cdf 100755 Binary files a/sqlite3/libc/libc.wasm and b/sqlite3/libc/libc.wasm differ diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index a88fc63f..43545045 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -3,7 +3,7 @@ (type $1 (func (param i32 i32) (result i32))) (type $2 (func (param i32) (result i32))) (memory $0 256) - (data $0 (i32.const 1024) "\01") + (data $0 (i32.const 65536) "\01") (export "memory" (memory $0)) (export "memset" (func $memset)) (export "memcpy" (func $memcpy)) @@ -229,62 +229,70 @@ (local $1 i32) (local $2 i32) (local $3 v128) - (local $scratch v128) - (if - (i32.eqz - (local.tee $2 - (i32.and - (i8x16.bitmask - (i8x16.eq - (v128.load - (local.tee $1 - (i32.and - (local.get $0) - (i32.const -16) - ) + (block $block1 + (block $block + (br_if $block + (i8x16.all_true + (local.tee $3 + (v128.load + (local.tee $1 + (i32.and + (local.get $0) + (i32.const -16) ) ) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - ) - (i32.shl - (i32.const -1) - (i32.and - (local.get $0) - (i32.const 15) ) ) ) ) - ) - (then - (loop $label - (br_if $label - (i32.eqz - (local.tee $2 + (br_if $block + (i32.eqz + (local.tee $2 + (i32.and (i8x16.bitmask (i8x16.eq - (block (result v128) - (local.set $scratch - (v128.load offset=16 - (local.get $1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) - ) - ) - (local.get $scratch) - ) + (local.get $3) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) ) + (i32.shl + (i32.const -1) + (i32.and + (local.get $0) + (i32.const 15) + ) + ) ) ) ) ) + (br $block1) + ) + (loop $label + (local.set $3 + (v128.load offset=16 + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 16) + ) + ) + (br_if $label + (i8x16.all_true + (local.get $3) + ) + ) + ) + (local.set $2 + (i8x16.bitmask + (i8x16.eq + (local.get $3) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + ) ) ) (i32.add @@ -624,74 +632,81 @@ (i32.const 0) ) (func $strchrnul (param $0 i32) (param $1 i32) (result i32) - (local $2 i32) + (local $2 v128) (local $3 v128) - (local $4 v128) - (if - (i32.eqz - (local.tee $0 - (i32.and - (i8x16.bitmask - (v128.or - (i8x16.eq - (local.tee $3 - (v128.load - (local.tee $2 - (i32.and - (local.get $0) - (i32.const -16) - ) + (local $4 i32) + (block $block + (if + (v128.any_true + (local.tee $2 + (v128.or + (i8x16.eq + (local.tee $2 + (v128.load + (local.tee $4 + (i32.and + (local.get $0) + (i32.const -16) ) ) ) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) - (i8x16.eq - (local.get $3) - (local.tee $4 - (i8x16.splat - (local.get $1) - ) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + (i8x16.eq + (local.get $2) + (local.tee $3 + (i8x16.splat + (local.get $1) ) ) ) ) - (i32.shl - (i32.const -1) + ) + ) + (then + (br_if $block + (local.tee $0 (i32.and - (local.get $0) - (i32.const 15) + (i8x16.bitmask + (local.get $2) + ) + (i32.shl + (i32.const -1) + (i32.and + (local.get $0) + (i32.const 15) + ) + ) ) ) ) ) ) - (then - (loop $label - (local.set $3 - (v128.load offset=16 - (local.get $2) - ) + (loop $label + (local.set $2 + (v128.load offset=16 + (local.get $4) ) - (local.set $2 - (i32.add - (local.get $2) - (i32.const 16) - ) + ) + (local.set $4 + (i32.add + (local.get $4) + (i32.const 16) ) - (br_if $label - (i32.eqz - (local.tee $0 - (i8x16.bitmask - (v128.or - (i8x16.eq - (local.get $3) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (i8x16.eq - (local.get $3) - (local.get $4) - ) + ) + (br_if $label + (i32.eqz + (v128.any_true + (local.tee $2 + (v128.or + (i8x16.eq + (local.get $2) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + (i8x16.eq + (local.get $2) + (local.get $3) ) ) ) @@ -699,83 +714,95 @@ ) ) ) + (local.set $0 + (i8x16.bitmask + (local.get $2) + ) + ) ) (i32.add - (local.get $2) + (local.get $4) (i32.ctz (local.get $0) ) ) ) (func $strchr (param $0 i32) (param $1 i32) (result i32) - (local $2 i32) + (local $2 v128) (local $3 v128) - (local $4 v128) - (if - (i32.eqz - (local.tee $0 - (i32.and - (i8x16.bitmask - (v128.or - (i8x16.eq - (local.tee $3 - (v128.load - (local.tee $2 - (i32.and - (local.get $0) - (i32.const -16) - ) + (local $4 i32) + (block $block + (if + (v128.any_true + (local.tee $2 + (v128.or + (i8x16.eq + (local.tee $2 + (v128.load + (local.tee $4 + (i32.and + (local.get $0) + (i32.const -16) ) ) ) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) - (i8x16.eq - (local.get $3) - (local.tee $4 - (i8x16.splat - (local.get $1) - ) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + (i8x16.eq + (local.get $2) + (local.tee $3 + (i8x16.splat + (local.get $1) ) ) ) ) - (i32.shl - (i32.const -1) + ) + ) + (then + (br_if $block + (local.tee $0 (i32.and - (local.get $0) - (i32.const 15) + (i8x16.bitmask + (local.get $2) + ) + (i32.shl + (i32.const -1) + (i32.and + (local.get $0) + (i32.const 15) + ) + ) ) ) ) ) ) - (then - (loop $label - (local.set $3 - (v128.load offset=16 - (local.get $2) - ) + (loop $label + (local.set $2 + (v128.load offset=16 + (local.get $4) ) - (local.set $2 - (i32.add - (local.get $2) - (i32.const 16) - ) + ) + (local.set $4 + (i32.add + (local.get $4) + (i32.const 16) ) - (br_if $label - (i32.eqz - (local.tee $0 - (i8x16.bitmask - (v128.or - (i8x16.eq - (local.get $3) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (i8x16.eq - (local.get $3) - (local.get $4) - ) + ) + (br_if $label + (i32.eqz + (v128.any_true + (local.tee $2 + (v128.or + (i8x16.eq + (local.get $2) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + (i8x16.eq + (local.get $2) + (local.get $3) ) ) ) @@ -783,11 +810,16 @@ ) ) ) + (local.set $0 + (i8x16.bitmask + (local.get $2) + ) + ) ) (select (local.tee $0 (i32.add - (local.get $2) + (local.get $4) (i32.ctz (local.get $0) ) @@ -808,67 +840,72 @@ (func $strspn (param $0 i32) (param $1 i32) (result i32) (local $2 i32) (local $3 i32) + (if + (i32.eqz + (local.tee $2 + (i32.load8_u + (local.get $1) + ) + ) + ) + (then + (return + (i32.const 0) + ) + ) + ) (v128.store - (i32.const 66560) + (i32.const 65520) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (v128.store - (i32.const 66544) + (i32.const 65504) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) - (if - (local.tee $2 - (i32.load8_u - (local.get $1) - ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) ) - (then - (local.set $1 + ) + (loop $label + (i32.store + (local.tee $3 (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (loop $label - (i32.store - (local.tee $3 - (i32.add - (i32.and - (i32.shr_u - (local.get $2) - (i32.const 3) - ) - (i32.const 28) - ) - (i32.const 66544) - ) - ) - (i32.or - (i32.load - (local.get $3) - ) - (i32.shl - (i32.const 1) + (i32.and + (i32.shr_u (local.get $2) + (i32.const 3) ) + (i32.const 28) ) + (i32.const 65504) ) - (local.set $2 - (i32.load8_u - (local.get $1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) + ) + (i32.or + (i32.load + (local.get $3) ) - (br_if $label + (i32.shl + (i32.const 1) (local.get $2) ) ) ) + (local.set $2 + (i32.load8_u + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (br_if $label + (local.get $2) + ) ) (if (local.tee $2 @@ -893,7 +930,7 @@ ) (i32.const 28) ) - (i32.const 66544) + (i32.const 65504) ) ) (local.get $2) @@ -952,71 +989,78 @@ ) ) ) - (if - (i32.eqz - (local.tee $2 - (i32.and - (i8x16.bitmask - (v128.or - (i8x16.eq - (local.tee $4 - (v128.load - (local.tee $1 - (i32.and - (local.get $0) - (i32.const -16) - ) + (block $block1 + (if + (v128.any_true + (local.tee $4 + (v128.or + (i8x16.eq + (local.tee $4 + (v128.load + (local.tee $1 + (i32.and + (local.get $0) + (i32.const -16) ) ) ) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) - (i8x16.eq - (local.get $4) - (local.tee $5 - (i8x16.splat - (local.get $2) - ) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + (i8x16.eq + (local.get $4) + (local.tee $5 + (i8x16.splat + (local.get $2) ) ) ) ) - (i32.shl - (i32.const -1) + ) + ) + (then + (br_if $block1 + (local.tee $2 (i32.and - (local.get $0) - (i32.const 15) + (i8x16.bitmask + (local.get $4) + ) + (i32.shl + (i32.const -1) + (i32.and + (local.get $0) + (i32.const 15) + ) + ) ) ) ) ) ) - (then - (loop $label - (local.set $4 - (v128.load offset=16 - (local.get $1) - ) + (loop $label + (local.set $4 + (v128.load offset=16 + (local.get $1) ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) - ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 16) ) - (br_if $label - (i32.eqz - (local.tee $2 - (i8x16.bitmask - (v128.or - (i8x16.eq - (local.get $4) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (i8x16.eq - (local.get $4) - (local.get $5) - ) + ) + (br_if $label + (i32.eqz + (v128.any_true + (local.tee $4 + (v128.or + (i8x16.eq + (local.get $4) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + (i8x16.eq + (local.get $4) + (local.get $5) ) ) ) @@ -1024,6 +1068,11 @@ ) ) ) + (local.set $2 + (i8x16.bitmask + (local.get $4) + ) + ) ) (return (i32.sub @@ -1038,11 +1087,11 @@ ) ) (v128.store - (i32.const 66560) + (i32.const 65520) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (v128.store - (i32.const 66544) + (i32.const 65504) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (local.set $1 @@ -1062,7 +1111,7 @@ ) (i32.const 28) ) - (i32.const 66544) + (i32.const 65504) ) ) (i32.or @@ -1112,7 +1161,7 @@ ) (i32.const 28) ) - (i32.const 66544) + (i32.const 65504) ) ) (local.get $2) diff --git a/sqlite3/strings.c b/sqlite3/strings.c index fed7b48c..24cb9d79 100644 --- a/sqlite3/strings.c +++ b/sqlite3/strings.c @@ -66,10 +66,13 @@ size_t strlen(const char *s) { const v128_t *w = (void *)(s - align); while (true) { - int mask = - wasm_i8x16_bitmask(wasm_i8x16_eq(*w, (v128_t){})) >> align << align; - if (mask) { - return (char *)w - s + __builtin_ctz(mask); + if (!wasm_i8x16_all_true(*w)) { + const v128_t cmp = wasm_i8x16_eq(*w, (v128_t){}); + int mask = wasm_i8x16_bitmask(cmp) >> align << align; + __builtin_assume(mask || align); + if (mask) { + return (char *)w - s + __builtin_ctz(mask); + } } align = 0; w++; @@ -136,11 +139,13 @@ char *strchrnul(const char *s, int c) { const v128_t wc = wasm_i8x16_splat(c); while (true) { - int mask = wasm_i8x16_bitmask(wasm_i8x16_eq(*w, (v128_t){}) | - wasm_i8x16_eq(*w, wc)) >> - align << align; - if (mask) { - return (char *)w + __builtin_ctz(mask); + const v128_t cmp = wasm_i8x16_eq(*w, (v128_t){}) | wasm_i8x16_eq(*w, wc); + if (wasm_v128_any_true(cmp)) { + int mask = wasm_i8x16_bitmask(cmp) >> align << align; + __builtin_assume(mask || align); + if (mask) { + return (char *)w + __builtin_ctz(mask); + } } align = 0; w++; @@ -159,6 +164,8 @@ char *strchr(const char *s, int c) { << ((b) % (8 * sizeof(size_t)))) size_t strspn(const char *s, const char *c) { + if (!c[0]) return 0; + const char *const a = s; size_t byteset[32 / sizeof(size_t)] = {0}; diff --git a/util/sql3util/wasm/build.sh b/util/sql3util/wasm/build.sh index 71ad523e..53a00c5d 100755 --- a/util/sql3util/wasm/build.sh +++ b/util/sql3util/wasm/build.sh @@ -24,7 +24,7 @@ trap 'rm -f sql3parse_table.tmp' EXIT "$BINARYEN/wasm-ctor-eval" -c _initialize sql3parse_table.wasm -o sql3parse_table.tmp "$BINARYEN/wasm-opt" --strip --strip-debug --strip-producers -c -Oz \ - sql3parse_table.tmp -o sql3parse_table.wasm \ + sql3parse_table.tmp -o sql3parse_table.wasm --low-memory-unused \ --enable-simd --enable-mutable-globals --enable-multivalue \ --enable-bulk-memory --enable-reference-types \ --enable-nontrapping-float-to-int --enable-sign-ext \ No newline at end of file diff --git a/util/sql3util/wasm/sql3parse_table.wasm b/util/sql3util/wasm/sql3parse_table.wasm index 9fe71846..fa8af14a 100755 Binary files a/util/sql3util/wasm/sql3parse_table.wasm and b/util/sql3util/wasm/sql3parse_table.wasm differ diff --git a/vfs/tests/mptest/wasm/build.sh b/vfs/tests/mptest/wasm/build.sh index 9e660cf9..18c2fd5b 100755 --- a/vfs/tests/mptest/wasm/build.sh +++ b/vfs/tests/mptest/wasm/build.sh @@ -26,7 +26,7 @@ WASI_SDK="$ROOT/tools/wasi-sdk/bin" $(awk '{print "-Wl,--export="$0}' exports.txt) "$BINARYEN/wasm-opt" -g --strip --strip-producers -c -O3 \ - mptest.wasm -o mptest.tmp \ + mptest.wasm -o mptest.tmp --low-memory-unused \ --enable-simd --enable-mutable-globals --enable-multivalue \ --enable-bulk-memory --enable-reference-types \ --enable-nontrapping-float-to-int --enable-sign-ext diff --git a/vfs/tests/mptest/wasm/mptest.wasm b/vfs/tests/mptest/wasm/mptest.wasm index 063d883f..95942c08 100644 Binary files a/vfs/tests/mptest/wasm/mptest.wasm and b/vfs/tests/mptest/wasm/mptest.wasm differ diff --git a/vfs/tests/speedtest1/wasm/build.sh b/vfs/tests/speedtest1/wasm/build.sh index 833205f9..ffcf1572 100755 --- a/vfs/tests/speedtest1/wasm/build.sh +++ b/vfs/tests/speedtest1/wasm/build.sh @@ -21,7 +21,7 @@ WASI_SDK="$ROOT/tools/wasi-sdk/bin" $(awk '{print "-Wl,--export="$0}' exports.txt) "$BINARYEN/wasm-opt" -g --strip --strip-producers -c -O3 \ - speedtest1.wasm -o speedtest1.tmp \ + speedtest1.wasm -o speedtest1.tmp --low-memory-unused \ --enable-simd --enable-mutable-globals --enable-multivalue \ --enable-bulk-memory --enable-reference-types \ --enable-nontrapping-float-to-int --enable-sign-ext diff --git a/vfs/tests/speedtest1/wasm/speedtest1.wasm b/vfs/tests/speedtest1/wasm/speedtest1.wasm index 3d2201f4..9d42a9e3 100644 Binary files a/vfs/tests/speedtest1/wasm/speedtest1.wasm and b/vfs/tests/speedtest1/wasm/speedtest1.wasm differ