Skip to content

Commit 4ed8acb

Browse files
authored
fix: Fix compiler-internal UTF-8 encoding & decoding helpers (#1345)
1 parent 6dce0f2 commit 4ed8acb

File tree

1 file changed

+10
-34
lines changed

1 file changed

+10
-34
lines changed

src/module.ts

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2311,25 +2311,21 @@ function stringLengthUTF8(str: string): usize {
23112311
u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF);
23122312
}
23132313
if (u <= 0x7F) {
2314-
++len;
2314+
len += 1;
23152315
} else if (u <= 0x7FF) {
23162316
len += 2;
23172317
} else if (u <= 0xFFFF) {
23182318
len += 3;
23192319
} else if (u <= 0x1FFFFF) {
23202320
len += 4;
2321-
} else if (u <= 0x3FFFFFF) {
2322-
len += 5;
2323-
} else {
2324-
len += 6;
23252321
}
23262322
}
23272323
return len;
23282324
}
23292325

23302326
function allocString(str: string | null): usize {
23312327
if (str === null) return 0;
2332-
var ptr = binaryen._malloc(stringLengthUTF8(str) + 1);
2328+
var ptr = binaryen._malloc(stringLengthUTF8(str) + 1) >>> 0;
23332329
// the following is based on Emscripten's stringToUTF8Array
23342330
var idx = ptr;
23352331
for (let i = 0, k = str.length; i < k; ++i) {
@@ -2346,21 +2342,9 @@ function allocString(str: string | null): usize {
23462342
binaryen.__i32_store8(idx++, (0xE0 | (u >>> 12) ) as u8);
23472343
binaryen.__i32_store8(idx++, (0x80 | ((u >>> 6) & 63)) as u8);
23482344
binaryen.__i32_store8(idx++, (0x80 | ( u & 63)) as u8);
2349-
} else if (u <= 0x1FFFFF) {
2350-
binaryen.__i32_store8(idx++, (0xF0 | (u >>> 18) ) as u8);
2351-
binaryen.__i32_store8(idx++, (0x80 | ((u >>> 12) & 63)) as u8);
2352-
binaryen.__i32_store8(idx++, (0x80 | ((u >>> 6) & 63)) as u8);
2353-
binaryen.__i32_store8(idx++, (0x80 | ( u & 63)) as u8);
2354-
} else if (u <= 0x3FFFFFF) {
2355-
binaryen.__i32_store8(idx++, (0xF8 | (u >>> 24) ) as u8);
2356-
binaryen.__i32_store8(idx++, (0x80 | ((u >>> 18) & 63)) as u8);
2357-
binaryen.__i32_store8(idx++, (0x80 | ((u >>> 12) & 63)) as u8);
2358-
binaryen.__i32_store8(idx++, (0x80 | ((u >>> 6) & 63)) as u8);
2359-
binaryen.__i32_store8(idx++, (0x80 | ( u & 63)) as u8);
23602345
} else {
2361-
binaryen.__i32_store8(idx++, (0xFC | (u >>> 30) ) as u8);
2362-
binaryen.__i32_store8(idx++, (0x80 | ((u >>> 24) & 63)) as u8);
2363-
binaryen.__i32_store8(idx++, (0x80 | ((u >>> 18) & 63)) as u8);
2346+
assert(u < 0x200000, "Invalid Unicode code point during allocString");
2347+
binaryen.__i32_store8(idx++, (0xF0 | (u >>> 18) ) as u8);
23642348
binaryen.__i32_store8(idx++, (0x80 | ((u >>> 12) & 63)) as u8);
23652349
binaryen.__i32_store8(idx++, (0x80 | ((u >>> 6) & 63)) as u8);
23662350
binaryen.__i32_store8(idx++, (0x80 | ( u & 63)) as u8);
@@ -2370,9 +2354,9 @@ function allocString(str: string | null): usize {
23702354
return ptr;
23712355
}
23722356

2373-
function readBuffer(ptr: usize, length: i32): Uint8Array {
2374-
var ret = new Uint8Array(length);
2375-
for (let i = 0; i < length; ++i) {
2357+
function readBuffer(ptr: usize, len: i32): Uint8Array {
2358+
var ret = new Uint8Array(len);
2359+
for (let i = 0; i < len; ++i) {
23762360
ret[i] = binaryen.__i32_load8_u(ptr + <usize>i);
23772361
}
23782362
return ret;
@@ -2383,7 +2367,7 @@ export function readString(ptr: usize): string | null {
23832367
var arr = new Array<i32>();
23842368
// the following is based on Emscripten's UTF8ArrayToString
23852369
var cp: u32;
2386-
var u1: u32, u2: u32, u3: u32, u4: u32, u5: u32;
2370+
var u1: u32, u2: u32, u3: u32;
23872371
while (cp = binaryen.__i32_load8_u(ptr++)) {
23882372
if (!(cp & 0x80)) {
23892373
arr.push(cp);
@@ -2402,26 +2386,18 @@ export function readString(ptr: usize): string | null {
24022386
if ((cp & 0xF8) == 0xF0) {
24032387
cp = ((cp & 7) << 18) | (u1 << 12) | (u2 << 6) | u3;
24042388
} else {
2405-
u4 = binaryen.__i32_load8_u(ptr++) & 63;
2406-
if ((cp & 0xFC) == 0xF8) {
2407-
cp = ((cp & 3) << 24) | (u1 << 18) | (u2 << 12) | (u3 << 6) | u4;
2408-
} else {
2409-
u5 = binaryen.__i32_load8_u(ptr++) & 63;
2410-
cp = ((cp & 1) << 30) | (u1 << 24) | (u2 << 18) | (u3 << 12) | (u4 << 6) | u5;
2411-
}
2389+
assert(false, "Invalid UTF8 sequence during readString");
24122390
}
24132391
}
2414-
arr.push(cp);
24152392
if (cp < 0x10000) {
24162393
arr.push(cp);
24172394
} else {
24182395
let ch = cp - 0x10000;
2419-
arr.push(0xD800 | (ch >> 10));
2396+
arr.push(0xD800 | (ch >>> 10));
24202397
arr.push(0xDC00 | (ch & 0x3FF));
24212398
}
24222399
}
24232400
return String.fromCharCodes(arr);
2424-
// return String.fromCodePoints(arr);
24252401
}
24262402

24272403
/** Result structure of {@link Module#toBinary}. */

0 commit comments

Comments
 (0)