Skip to content

Commit d6d0246

Browse files
committed
zig1.c: decompress zig1.wasm.zst with zstd
1 parent 8f64b8f commit d6d0246

File tree

3 files changed

+77
-644
lines changed

3 files changed

+77
-644
lines changed

CMakeLists.txt

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,16 @@ set(ZIG_CONFIG_ZIG_OUT "${CMAKE_BINARY_DIR}/config.zig")
181181

182182
set(STAGE1_SOURCES
183183
"${CMAKE_SOURCE_DIR}/stage1/zig1.c"
184+
"${CMAKE_SOURCE_DIR}/stage1/zstd/lib/decompress/huf_decompress.c"
185+
"${CMAKE_SOURCE_DIR}/stage1/zstd/lib/decompress/zstd_ddict.c"
186+
"${CMAKE_SOURCE_DIR}/stage1/zstd/lib/decompress/zstd_decompress.c"
187+
"${CMAKE_SOURCE_DIR}/stage1/zstd/lib/decompress/zstd_decompress_block.c"
188+
"${CMAKE_SOURCE_DIR}/stage1/zstd/lib/common/entropy_common.c"
189+
"${CMAKE_SOURCE_DIR}/stage1/zstd/lib/common/error_private.c"
190+
"${CMAKE_SOURCE_DIR}/stage1/zstd/lib/common/fse_decompress.c"
191+
"${CMAKE_SOURCE_DIR}/stage1/zstd/lib/common/pool.c"
192+
"${CMAKE_SOURCE_DIR}/stage1/zstd/lib/common/xxhash.c"
193+
"${CMAKE_SOURCE_DIR}/stage1/zstd/lib/common/zstd_common.c"
184194
)
185195
set(ZIG_CPP_SOURCES
186196
# These are planned to stay even when we are self-hosted.
@@ -710,14 +720,15 @@ endif()
710720
add_executable(zig1 ${STAGE1_SOURCES})
711721
set_target_properties(zig1 PROPERTIES COMPILE_FLAGS ${ZIG1_COMPILE_FLAGS})
712722
target_link_libraries(zig1 LINK_PUBLIC m)
713-
723+
target_include_directories(zig1 PUBLIC "${CMAKE_SOURCE_DIR}/stage1/zstd/lib")
724+
target_compile_definitions(zig1 PRIVATE ZSTD_DISABLE_ASM)
714725

715726
set(ZIG2_C_SOURCE "${CMAKE_BINARY_DIR}/zig2.c")
716727
set(BUILD_ZIG2_ARGS
717728
"${CMAKE_SOURCE_DIR}/lib"
718729
"${CMAKE_BINARY_DIR}"
719730
zig2
720-
"${CMAKE_SOURCE_DIR}/stage1/zig1.wasm"
731+
"${CMAKE_SOURCE_DIR}/stage1/zig1.wasm.zst"
721732
build-exe src/main.zig -ofmt=c -lc
722733
-target x86_64-linux-musl # TODO: autodetect in zig1.c
723734
-OReleaseFast
@@ -736,7 +747,7 @@ set(BUILD_COMPILER_RT_ARGS
736747
"${CMAKE_SOURCE_DIR}/lib"
737748
"${CMAKE_BINARY_DIR}"
738749
compiler_rt
739-
"${CMAKE_SOURCE_DIR}/stage1/zig1.wasm"
750+
"${CMAKE_SOURCE_DIR}/stage1/zig1.wasm.zst"
740751
build-obj lib/compiler_rt.zig -ofmt=c
741752
-target x86_64-linux-musl # TODO: autodetect in zig1.c
742753
-OReleaseFast

stage1/zig1.c

Lines changed: 63 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#include <sys/random.h>
2121
#endif
2222

23+
#include <zstd.h>
24+
2325
enum wasi_errno_t {
2426
WASI_ESUCCESS = 0,
2527
WASI_E2BIG = 1,
@@ -4122,7 +4124,12 @@ int main(int argc, char **argv) {
41224124

41234125
new_argv[new_argv_i] = NULL;
41244126

4125-
const struct ByteSlice mod = read_file_alloc(wasm_file);
4127+
const struct ByteSlice compressed_bytes = read_file_alloc(wasm_file);
4128+
4129+
const size_t max_uncompressed_size = 2500000;
4130+
char *mod_ptr = arena_alloc(max_uncompressed_size);
4131+
size_t mod_len = ZSTD_decompress(mod_ptr, max_uncompressed_size,
4132+
compressed_bytes.ptr, compressed_bytes.len);
41264133

41274134
int cwd = err_wrap("opening cwd", open(".", O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_PATH));
41284135
int zig_lib_dir = err_wrap("opening zig lib dir", open(zig_lib_dir_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_PATH));
@@ -4136,22 +4143,22 @@ int main(int argc, char **argv) {
41364143

41374144
uint32_t i = 0;
41384145

4139-
if (mod.ptr[0] != 0 || mod.ptr[1] != 'a' || mod.ptr[2] != 's' || mod.ptr[3] != 'm') {
4146+
if (mod_ptr[0] != 0 || mod_ptr[1] != 'a' || mod_ptr[2] != 's' || mod_ptr[3] != 'm') {
41404147
panic("bad magic");
41414148
}
41424149
i += 4;
41434150

4144-
uint32_t version = read_u32_le(mod.ptr + i);
4151+
uint32_t version = read_u32_le(mod_ptr + i);
41454152
i += 4;
41464153
if (version != 1) panic("bad wasm version");
41474154

41484155
uint32_t section_starts[13];
41494156
memset(&section_starts, 0, sizeof(uint32_t) * 13);
41504157

4151-
while (i < mod.len) {
4152-
uint8_t section_id = mod.ptr[i];
4158+
while (i < mod_len) {
4159+
uint8_t section_id = mod_ptr[i];
41534160
i += 1;
4154-
uint32_t section_len = read32_uleb128(mod.ptr, &i);
4161+
uint32_t section_len = read32_uleb128(mod_ptr, &i);
41554162
section_starts[section_id] = i;
41564163
i += section_len;
41574164
}
@@ -4160,29 +4167,29 @@ int main(int argc, char **argv) {
41604167
struct TypeInfo *types;
41614168
{
41624169
i = section_starts[Section_type];
4163-
uint32_t types_len = read32_uleb128(mod.ptr, &i);
4170+
uint32_t types_len = read32_uleb128(mod_ptr, &i);
41644171
types = arena_alloc(sizeof(struct TypeInfo) * types_len);
41654172
for (size_t type_i = 0; type_i < types_len; type_i += 1) {
41664173
struct TypeInfo *info = &types[type_i];
4167-
if (mod.ptr[i] != 0x60) panic("bad type byte");
4174+
if (mod_ptr[i] != 0x60) panic("bad type byte");
41684175
i += 1;
41694176

4170-
info->param_count = read32_uleb128(mod.ptr, &i);
4177+
info->param_count = read32_uleb128(mod_ptr, &i);
41714178
if (info->param_count > 32) panic("found a type with over 32 parameters");
41724179
info->param_types = 0;
41734180
for (uint32_t param_i = 0; param_i < info->param_count; param_i += 1) {
4174-
int64_t param_type = read64_ileb128(mod.ptr, &i);
4181+
int64_t param_type = read64_ileb128(mod_ptr, &i);
41754182
switch (param_type) {
41764183
case -1: case -3: bs_unset(&info->param_types, param_i); break;
41774184
case -2: case -4: bs_set(&info->param_types, param_i); break;
41784185
default: panic("unexpected param type");
41794186
}
41804187
}
41814188

4182-
info->result_count = read32_uleb128(mod.ptr, &i);
4189+
info->result_count = read32_uleb128(mod_ptr, &i);
41834190
info->result_types = 0;
41844191
for (uint32_t result_i = 0; result_i < info->result_count; result_i += 1) {
4185-
int64_t result_type = read64_ileb128(mod.ptr, &i);
4192+
int64_t result_type = read64_ileb128(mod_ptr, &i);
41864193
switch (result_type) {
41874194
case -1: case -3: bs_unset(&info->result_types, result_i); break;
41884195
case -2: case -4: bs_set(&info->result_types, result_i); break;
@@ -4197,18 +4204,18 @@ int main(int argc, char **argv) {
41974204
uint32_t imports_len;
41984205
{
41994206
i = section_starts[Section_import];
4200-
imports_len = read32_uleb128(mod.ptr, &i);
4207+
imports_len = read32_uleb128(mod_ptr, &i);
42014208
imports = arena_alloc(sizeof(struct Import) * imports_len);
42024209
for (size_t imp_i = 0; imp_i < imports_len; imp_i += 1) {
42034210
struct Import *imp = &imports[imp_i];
42044211

4205-
struct ByteSlice mod_name = read_name(mod.ptr, &i);
4212+
struct ByteSlice mod_name = read_name(mod_ptr, &i);
42064213
if (mod_name.len == strlen("wasi_snapshot_preview1") &&
42074214
memcmp(mod_name.ptr, "wasi_snapshot_preview1", mod_name.len) == 0) {
42084215
imp->mod = ImpMod_wasi_snapshot_preview1;
42094216
} else panic("unknown import module");
42104217

4211-
struct ByteSlice sym_name = read_name(mod.ptr, &i);
4218+
struct ByteSlice sym_name = read_name(mod_ptr, &i);
42124219
if (sym_name.len == strlen("args_get") &&
42134220
memcmp(sym_name.ptr, "args_get", sym_name.len) == 0) {
42144221
imp->name = ImpName_args_get;
@@ -4292,21 +4299,21 @@ int main(int argc, char **argv) {
42924299
imp->name = ImpName_random_get;
42934300
} else panic("unknown import name");
42944301

4295-
uint32_t desc = read32_uleb128(mod.ptr, &i);
4302+
uint32_t desc = read32_uleb128(mod_ptr, &i);
42964303
if (desc != 0) panic("external kind not function");
4297-
imp->type_idx = read32_uleb128(mod.ptr, &i);
4304+
imp->type_idx = read32_uleb128(mod_ptr, &i);
42984305
}
42994306
}
43004307

43014308
// Find _start in the exports
43024309
uint32_t start_fn_idx;
43034310
{
43044311
i = section_starts[Section_export];
4305-
uint32_t count = read32_uleb128(mod.ptr, &i);
4312+
uint32_t count = read32_uleb128(mod_ptr, &i);
43064313
for (; count > 0; count -= 1) {
4307-
struct ByteSlice name = read_name(mod.ptr, &i);
4308-
uint32_t desc = read32_uleb128(mod.ptr, &i);
4309-
start_fn_idx = read32_uleb128(mod.ptr, &i);
4314+
struct ByteSlice name = read_name(mod_ptr, &i);
4315+
uint32_t desc = read32_uleb128(mod_ptr, &i);
4316+
start_fn_idx = read32_uleb128(mod_ptr, &i);
43104317
if (desc == 0 && name.len == strlen("_start") &&
43114318
memcmp(name.ptr, "_start", name.len) == 0)
43124319
{
@@ -4321,30 +4328,30 @@ int main(int argc, char **argv) {
43214328
uint32_t functions_len;
43224329
{
43234330
i = section_starts[Section_function];
4324-
functions_len = read32_uleb128(mod.ptr, &i);
4331+
functions_len = read32_uleb128(mod_ptr, &i);
43254332
functions = arena_alloc(sizeof(struct Function) * functions_len);
43264333
for (size_t func_i = 0; func_i < functions_len; func_i += 1) {
43274334
struct Function *func = &functions[func_i];
4328-
func->type_idx = read32_uleb128(mod.ptr, &i);
4335+
func->type_idx = read32_uleb128(mod_ptr, &i);
43294336
}
43304337
}
43314338

43324339
// Allocate and initialize globals.
43334340
uint64_t *globals;
43344341
{
43354342
i = section_starts[Section_global];
4336-
uint32_t globals_len = read32_uleb128(mod.ptr, &i);
4343+
uint32_t globals_len = read32_uleb128(mod_ptr, &i);
43374344
globals = arena_alloc(sizeof(uint64_t) * globals_len);
43384345
for (size_t glob_i = 0; glob_i < globals_len; glob_i += 1) {
43394346
uint64_t *global = &globals[glob_i];
4340-
uint32_t content_type = read32_uleb128(mod.ptr, &i);
4341-
uint32_t mutability = read32_uleb128(mod.ptr, &i);
4347+
uint32_t content_type = read32_uleb128(mod_ptr, &i);
4348+
uint32_t mutability = read32_uleb128(mod_ptr, &i);
43424349
if (mutability != 1) panic("expected mutable global");
43434350
if (content_type != 0x7f) panic("unexpected content type");
4344-
uint8_t opcode = mod.ptr[i];
4351+
uint8_t opcode = mod_ptr[i];
43454352
i += 1;
43464353
if (opcode != WasmOp_i32_const) panic("expected i32_const op");
4347-
uint32_t init = read32_ileb128(mod.ptr, &i);
4354+
uint32_t init = read32_ileb128(mod_ptr, &i);
43484355
*global = (uint32_t)init;
43494356
}
43504357
}
@@ -4353,64 +4360,64 @@ int main(int argc, char **argv) {
43534360
uint32_t memory_len;
43544361
{
43554362
i = section_starts[Section_memory];
4356-
uint32_t memories_len = read32_uleb128(mod.ptr, &i);
4363+
uint32_t memories_len = read32_uleb128(mod_ptr, &i);
43574364
if (memories_len != 1) panic("unexpected memory count");
4358-
uint32_t flags = read32_uleb128(mod.ptr, &i);
4365+
uint32_t flags = read32_uleb128(mod_ptr, &i);
43594366
(void)flags;
4360-
memory_len = read32_uleb128(mod.ptr, &i) * wasm_page_size;
4367+
memory_len = read32_uleb128(mod_ptr, &i) * wasm_page_size;
43614368

43624369
i = section_starts[Section_data];
4363-
uint32_t datas_count = read32_uleb128(mod.ptr, &i);
4370+
uint32_t datas_count = read32_uleb128(mod_ptr, &i);
43644371
for (; datas_count > 0; datas_count -= 1) {
4365-
uint32_t mode = read32_uleb128(mod.ptr, &i);
4372+
uint32_t mode = read32_uleb128(mod_ptr, &i);
43664373
if (mode != 0) panic("expected mode 0");
4367-
enum WasmOp opcode = mod.ptr[i];
4374+
enum WasmOp opcode = mod_ptr[i];
43684375
i += 1;
43694376
if (opcode != WasmOp_i32_const) panic("expected opcode i32_const");
4370-
uint32_t offset = read32_uleb128(mod.ptr, &i);
4371-
enum WasmOp end = mod.ptr[i];
4377+
uint32_t offset = read32_uleb128(mod_ptr, &i);
4378+
enum WasmOp end = mod_ptr[i];
43724379
if (end != WasmOp_end) panic("expected end opcode");
43734380
i += 1;
4374-
uint32_t bytes_len = read32_uleb128(mod.ptr, &i);
4375-
memcpy(memory + offset, mod.ptr + i, bytes_len);
4381+
uint32_t bytes_len = read32_uleb128(mod_ptr, &i);
4382+
memcpy(memory + offset, mod_ptr + i, bytes_len);
43764383
i += bytes_len;
43774384
}
43784385
}
43794386

43804387
uint32_t *table = NULL;
43814388
{
43824389
i = section_starts[Section_table];
4383-
uint32_t table_count = read32_uleb128(mod.ptr, &i);
4390+
uint32_t table_count = read32_uleb128(mod_ptr, &i);
43844391
if (table_count > 1) {
43854392
panic("expected only one table section");
43864393
} else if (table_count == 1) {
4387-
uint32_t element_type = read32_uleb128(mod.ptr, &i);
4394+
uint32_t element_type = read32_uleb128(mod_ptr, &i);
43884395
(void)element_type;
4389-
uint32_t has_max = read32_uleb128(mod.ptr, &i);
4396+
uint32_t has_max = read32_uleb128(mod_ptr, &i);
43904397
if (has_max != 1) panic("expected has_max==1");
4391-
uint32_t initial = read32_uleb128(mod.ptr, &i);
4398+
uint32_t initial = read32_uleb128(mod_ptr, &i);
43924399
(void)initial;
4393-
uint32_t maximum = read32_uleb128(mod.ptr, &i);
4400+
uint32_t maximum = read32_uleb128(mod_ptr, &i);
43944401

43954402
i = section_starts[Section_element];
4396-
uint32_t element_section_count = read32_uleb128(mod.ptr, &i);
4403+
uint32_t element_section_count = read32_uleb128(mod_ptr, &i);
43974404
if (element_section_count != 1) panic("expected one element section");
4398-
uint32_t flags = read32_uleb128(mod.ptr, &i);
4405+
uint32_t flags = read32_uleb128(mod_ptr, &i);
43994406
(void)flags;
4400-
enum WasmOp opcode = mod.ptr[i];
4407+
enum WasmOp opcode = mod_ptr[i];
44014408
i += 1;
44024409
if (opcode != WasmOp_i32_const) panic("expected op i32_const");
4403-
uint32_t offset = read32_uleb128(mod.ptr, &i);
4404-
enum WasmOp end = mod.ptr[i];
4410+
uint32_t offset = read32_uleb128(mod_ptr, &i);
4411+
enum WasmOp end = mod_ptr[i];
44054412
if (end != WasmOp_end) panic("expected op end");
44064413
i += 1;
4407-
uint32_t elem_count = read32_uleb128(mod.ptr, &i);
4414+
uint32_t elem_count = read32_uleb128(mod_ptr, &i);
44084415

44094416
table = arena_alloc(sizeof(uint32_t) * maximum);
44104417
memset(table, 0, sizeof(uint32_t) * maximum);
44114418

44124419
for (uint32_t elem_i = 0; elem_i < elem_count; elem_i += 1) {
4413-
table[elem_i + offset] = read32_uleb128(mod.ptr, &i);
4420+
table[elem_i + offset] = read32_uleb128(mod_ptr, &i);
44144421
}
44154422
}
44164423
}
@@ -4420,7 +4427,7 @@ int main(int argc, char **argv) {
44204427
memset(&vm, 0xaa, sizeof(struct VirtualMachine)); // to match the zig version
44214428
#endif
44224429
vm.stack = arena_alloc(sizeof(uint64_t) * 10000000),
4423-
vm.mod_ptr = mod.ptr;
4430+
vm.mod_ptr = mod_ptr;
44244431
vm.opcodes = arena_alloc(2000000);
44254432
vm.operands = arena_alloc(sizeof(uint32_t) * 2000000);
44264433
vm.stack_top = 0;
@@ -4436,26 +4443,26 @@ int main(int argc, char **argv) {
44364443

44374444
{
44384445
uint32_t code_i = section_starts[Section_code];
4439-
uint32_t codes_len = read32_uleb128(mod.ptr, &code_i);
4446+
uint32_t codes_len = read32_uleb128(mod_ptr, &code_i);
44404447
if (codes_len != functions_len) panic("code/function length mismatch");
44414448
struct ProgramCounter pc;
44424449
pc.opcode = 0;
44434450
pc.operand = 0;
44444451
for (uint32_t func_i = 0; func_i < functions_len; func_i += 1) {
44454452
struct Function *func = &functions[func_i];
4446-
uint32_t size = read32_uleb128(mod.ptr, &code_i);
4453+
uint32_t size = read32_uleb128(mod_ptr, &code_i);
44474454
uint32_t code_begin = code_i;
44484455

44494456
struct TypeInfo *type_info = &vm.types[func->type_idx];
44504457
func->locals_count = 0;
44514458
func->local_types = malloc(sizeof(uint32_t) * ((type_info->param_count + func->locals_count + 31) / 32));
44524459
func->local_types[0] = type_info->param_types;
44534460

4454-
for (uint32_t local_sets_count = read32_uleb128(mod.ptr, &code_i);
4461+
for (uint32_t local_sets_count = read32_uleb128(mod_ptr, &code_i);
44554462
local_sets_count > 0; local_sets_count -= 1)
44564463
{
4457-
uint32_t set_count = read32_uleb128(mod.ptr, &code_i);
4458-
int64_t local_type = read64_ileb128(mod.ptr, &code_i);
4464+
uint32_t set_count = read32_uleb128(mod_ptr, &code_i);
4465+
int64_t local_type = read64_ileb128(mod_ptr, &code_i);
44594466

44604467
uint32_t i = type_info->param_count + func->locals_count;
44614468
func->locals_count += set_count;

0 commit comments

Comments
 (0)