diff --git a/src-self-hosted/clang.zig b/src-self-hosted/clang.zig index 87970168e8f8..ba2883dbbc8e 100644 --- a/src-self-hosted/clang.zig +++ b/src-self-hosted/clang.zig @@ -43,6 +43,7 @@ pub const struct_ZigClangImplicitCastExpr = @OpaqueType(); pub const struct_ZigClangIncompleteArrayType = @OpaqueType(); pub const struct_ZigClangIntegerLiteral = @OpaqueType(); pub const struct_ZigClangMacroDefinitionRecord = @OpaqueType(); +pub const struct_ZigClangMacroExpansion = @OpaqueType(); pub const struct_ZigClangMacroQualifiedType = @OpaqueType(); pub const struct_ZigClangMemberExpr = @OpaqueType(); pub const struct_ZigClangNamedDecl = @OpaqueType(); @@ -889,6 +890,7 @@ pub const ZigClangImplicitCastExpr = struct_ZigClangImplicitCastExpr; pub const ZigClangIncompleteArrayType = struct_ZigClangIncompleteArrayType; pub const ZigClangIntegerLiteral = struct_ZigClangIntegerLiteral; pub const ZigClangMacroDefinitionRecord = struct_ZigClangMacroDefinitionRecord; +pub const ZigClangMacroExpansion = struct_ZigClangMacroExpansion; pub const ZigClangMacroQualifiedType = struct_ZigClangMacroQualifiedType; pub const ZigClangMemberExpr = struct_ZigClangMemberExpr; pub const ZigClangNamedDecl = struct_ZigClangNamedDecl; @@ -1128,3 +1130,5 @@ pub extern fn ZigClangCompoundAssignOperator_getBeginLoc(*const ZigClangCompound pub extern fn ZigClangCompoundAssignOperator_getOpcode(*const ZigClangCompoundAssignOperator) ZigClangBO; pub extern fn ZigClangCompoundAssignOperator_getLHS(*const ZigClangCompoundAssignOperator) *const ZigClangExpr; pub extern fn ZigClangCompoundAssignOperator_getRHS(*const ZigClangCompoundAssignOperator) *const ZigClangExpr; + +pub extern fn ZigClangMacroExpansion_getDefinition(*const ZigClangMacroExpansion) *const ZigClangMacroDefinitionRecord; diff --git a/src/all_types.hpp b/src/all_types.hpp index ea46ab81a6dd..31682df32814 100644 --- a/src/all_types.hpp +++ b/src/all_types.hpp @@ -1036,8 +1036,15 @@ struct AstNodeFloatLiteral { bool overflow; }; +enum IntLiteralFormat { + IntLiteralFormatNone, + IntLiteralFormatHex, + IntLiteralFormatOctal, +}; + struct AstNodeIntLiteral { BigInt *bigint; + IntLiteralFormat format = IntLiteralFormatNone; }; struct AstNodeStructValueField { @@ -2129,6 +2136,7 @@ struct CodeGen { bool verbose_llvm_ir; bool verbose_cimport; bool verbose_cc; + bool quiet_translate_c; bool error_during_imports; bool generate_error_name_table; bool enable_cache; // mutually exclusive with output_dir diff --git a/src/ast_render.cpp b/src/ast_render.cpp index 2be932ac8e39..b4b6d682893f 100644 --- a/src/ast_render.cpp +++ b/src/ast_render.cpp @@ -615,8 +615,21 @@ static void render_node_extra(AstRender *ar, AstNode *node, bool grouped) { { Buf rendered_buf = BUF_INIT; buf_resize(&rendered_buf, 0); - bigint_append_buf(&rendered_buf, node->data.int_literal.bigint, 10); - fprintf(ar->f, "%s", buf_ptr(&rendered_buf)); + + switch (node->data.int_literal.format) { + case IntLiteralFormatHex: + bigint_append_buf(&rendered_buf, node->data.int_literal.bigint, 16); + fprintf(ar->f, "0x%s", buf_ptr(&rendered_buf)); + break; + case IntLiteralFormatOctal: + bigint_append_buf(&rendered_buf, node->data.int_literal.bigint, 8); + fprintf(ar->f, "0o%s", buf_ptr(&rendered_buf)); + break; + case IntLiteralFormatNone: + bigint_append_buf(&rendered_buf, node->data.int_literal.bigint, 10); + fprintf(ar->f, "%s", buf_ptr(&rendered_buf)); + break; + } } break; case NodeTypeStringLiteral: diff --git a/src/c_tokenizer.cpp b/src/c_tokenizer.cpp index 55fde190031c..42b958b8f490 100644 --- a/src/c_tokenizer.cpp +++ b/src/c_tokenizer.cpp @@ -116,6 +116,7 @@ static void begin_token(CTokenize *ctok, CTokId id) { break; case CTokIdCharLit: case CTokIdNumLitFloat: + case CTokIdPlus: case CTokIdMinus: case CTokIdLParen: case CTokIdRParen: @@ -125,7 +126,11 @@ static void begin_token(CTokenize *ctok, CTokId id) { case CTokIdBang: case CTokIdTilde: case CTokIdShl: + case CTokIdShr: case CTokIdLt: + case CTokIdGt: + case CTokIdInc: + case CTokIdDec: break; } } @@ -229,6 +234,10 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) { begin_token(ctok, CTokIdLt); ctok->state = CTokStateGotLt; break; + case '>': + begin_token(ctok, CTokIdGt); + ctok->state = CTokStateGotGt; + break; case '(': begin_token(ctok, CTokIdLParen); end_token(ctok); @@ -241,8 +250,13 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) { begin_token(ctok, CTokIdAsterisk); end_token(ctok); break; + case '+': + begin_token(ctok, CTokIdPlus); + ctok->state = CTokStateGotPlus; + break; case '-': begin_token(ctok, CTokIdMinus); + ctok->state = CTokStateGotMinus; end_token(ctok); break; case '!': @@ -270,6 +284,45 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) { continue; } break; + case CTokStateGotGt: + switch (*c) { + case '>': + ctok->cur_tok->id = CTokIdShr; + end_token(ctok); + ctok->state = CTokStateStart; + break; + default: + end_token(ctok); + ctok->state = CTokStateStart; + continue; + } + break; + case CTokStateGotPlus: + switch (*c) { + case '+': + ctok->cur_tok->id = CTokIdInc; + end_token(ctok); + ctok->state = CTokStateStart; + break; + default: + end_token(ctok); + ctok->state = CTokStateStart; + continue; + } + break; + case CTokStateGotMinus: + switch (*c) { + case '-': + ctok->cur_tok->id = CTokIdDec; + end_token(ctok); + ctok->state = CTokStateStart; + break; + default: + end_token(ctok); + ctok->state = CTokStateStart; + continue; + } + break; case CTokStateFloat: switch (*c) { case '.': @@ -375,6 +428,7 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) { case 'x': case 'X': ctok->state = CTokStateHex; + ctok->cur_tok->data.num_lit_int.format = CNumLitFormatHex; break; case '.': ctok->state = CTokStateFloat; @@ -391,6 +445,7 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) { default: c -= 1; ctok->state = CTokStateOctal; + ctok->cur_tok->data.num_lit_int.format = CNumLitFormatOctal; continue; } break; @@ -811,6 +866,9 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) { case CTokStateNumLitIntSuffixUL: case CTokStateNumLitIntSuffixLL: case CTokStateGotLt: + case CTokStateGotGt: + case CTokStateGotPlus: + case CTokStateGotMinus: end_token(ctok); break; case CTokStateFloat: diff --git a/src/c_tokenizer.hpp b/src/c_tokenizer.hpp index eaca09098f8a..a55ac9026e37 100644 --- a/src/c_tokenizer.hpp +++ b/src/c_tokenizer.hpp @@ -17,6 +17,7 @@ enum CTokId { CTokIdNumLitInt, CTokIdNumLitFloat, CTokIdSymbol, + CTokIdPlus, CTokIdMinus, CTokIdLParen, CTokIdRParen, @@ -26,7 +27,11 @@ enum CTokId { CTokIdBang, CTokIdTilde, CTokIdShl, + CTokIdShr, CTokIdLt, + CTokIdGt, + CTokIdInc, + CTokIdDec, }; enum CNumLitSuffix { @@ -38,9 +43,16 @@ enum CNumLitSuffix { CNumLitSuffixLLU, }; +enum CNumLitFormat { + CNumLitFormatNone, + CNumLitFormatHex, + CNumLitFormatOctal, +}; + struct CNumLitInt { uint64_t x; CNumLitSuffix suffix; + CNumLitFormat format; }; struct CTok { @@ -81,6 +93,9 @@ enum CTokState { CTokStateNumLitIntSuffixLL, CTokStateNumLitIntSuffixUL, CTokStateGotLt, + CTokStateGotGt, + CTokStateGotPlus, + CTokStateGotMinus, }; struct CTokenize { diff --git a/src/main.cpp b/src/main.cpp index 2cda18f7a0d9..a02a862d4b5d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -93,6 +93,7 @@ static int print_full_usage(const char *arg0, FILE *file, int return_code) { " --verbose-llvm-ir enable compiler debug output for LLVM IR\n" " --verbose-cimport enable compiler debug output for C imports\n" " --verbose-cc enable compiler debug output for C compilation\n" + " --quiet-translate-c disable translate C warnings\n" " -dirafter [dir] add directory to AFTER include search path\n" " -isystem [dir] add directory to SYSTEM include search path\n" " -I[dir] add directory to include search path\n" @@ -479,6 +480,7 @@ int main(int argc, char **argv) { bool verbose_llvm_ir = false; bool verbose_cimport = false; bool verbose_cc = false; + bool quiet_translate_c = false; ErrColor color = ErrColorAuto; CacheOpt enable_cache = CacheOptAuto; Buf *dynamic_linker = nullptr; @@ -692,6 +694,8 @@ int main(int argc, char **argv) { verbose_cimport = true; } else if (strcmp(arg, "--verbose-cc") == 0) { verbose_cc = true; + } else if (strcmp(arg, "--quiet-translate-c") == 0) { + quiet_translate_c = true; } else if (strcmp(arg, "-rdynamic") == 0) { rdynamic = true; } else if (strcmp(arg, "--each-lib-rpath") == 0) { @@ -886,7 +890,7 @@ int main(int argc, char **argv) { } else if (strcmp(arg, "--linker-script") == 0) { linker_script = argv[i]; } else if (strcmp(arg, "--version-script") == 0) { - version_script = buf_create_from_str(argv[i]); + version_script = buf_create_from_str(argv[i]); } else if (strcmp(arg, "-target-glibc") == 0) { target_glibc = argv[i]; } else if (strcmp(arg, "-rpath") == 0) { @@ -1212,7 +1216,7 @@ int main(int argc, char **argv) { codegen_set_lib_version(g, ver_major, ver_minor, ver_patch); g->want_single_threaded = want_single_threaded; codegen_set_linker_script(g, linker_script); - g->version_script_path = version_script; + g->version_script_path = version_script; if (each_lib_rpath) codegen_set_each_lib_rpath(g, each_lib_rpath); @@ -1228,6 +1232,7 @@ int main(int argc, char **argv) { g->verbose_llvm_ir = verbose_llvm_ir; g->verbose_cimport = verbose_cimport; g->verbose_cc = verbose_cc; + g->quiet_translate_c = quiet_translate_c; g->output_dir = output_dir; g->disable_gen_h = disable_gen_h; g->bundle_compiler_rt = bundle_compiler_rt; diff --git a/src/translate_c.cpp b/src/translate_c.cpp index 5b79bfe989d0..8d48a6b97b59 100644 --- a/src/translate_c.cpp +++ b/src/translate_c.cpp @@ -326,21 +326,26 @@ static AstNode *trans_create_node_str_lit(Context *c, Buf *buf) { return node; } -static AstNode *trans_create_node_unsigned_negative(Context *c, uint64_t x, bool is_negative) { +static AstNode *trans_create_node_unsigned_negative_format(Context *c, uint64_t x, bool is_negative, IntLiteralFormat format) { AstNode *node = trans_create_node(c, NodeTypeIntLiteral); node->data.int_literal.bigint = allocate(1); + node->data.int_literal.format = format; bigint_init_data(node->data.int_literal.bigint, &x, 1, is_negative); return node; } +static AstNode *trans_create_node_unsigned_negative(Context *c, uint64_t x, bool is_negative) { + return trans_create_node_unsigned_negative_format(c, x, is_negative, IntLiteralFormatNone); +} + static AstNode *trans_create_node_unsigned(Context *c, uint64_t x) { return trans_create_node_unsigned_negative(c, x, false); } static AstNode *trans_create_node_unsigned_negative_type(Context *c, uint64_t x, bool is_negative, - const char *type_name) + const char *type_name, IntLiteralFormat format) { - AstNode *lit_node = trans_create_node_unsigned_negative(c, x, is_negative); + AstNode *lit_node = trans_create_node_unsigned_negative_format(c, x, is_negative, format); return trans_create_node_cast(c, trans_create_node_symbol_str(c, type_name), lit_node); } @@ -4848,19 +4853,31 @@ static AstNode *parse_ctok_num_lit(Context *c, CTokenize *ctok, size_t *tok_i, b CTok *tok = &ctok->tokens.at(*tok_i); if (tok->id == CTokIdNumLitInt) { *tok_i += 1; + IntLiteralFormat format; + switch (tok->data.num_lit_int.format) { + case CNumLitFormatHex: + format = IntLiteralFormatHex; + break; + case CNumLitFormatOctal: + format = IntLiteralFormatOctal; + break; + case CNumLitFormatNone: + format = IntLiteralFormatNone; + break; + }; switch (tok->data.num_lit_int.suffix) { case CNumLitSuffixNone: - return trans_create_node_unsigned_negative(c, tok->data.num_lit_int.x, negate); + return trans_create_node_unsigned_negative_format(c, tok->data.num_lit_int.x, negate, format); case CNumLitSuffixL: - return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_long"); + return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_long", format); case CNumLitSuffixU: - return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_uint"); + return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_uint", format); case CNumLitSuffixLU: - return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_ulong"); + return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_ulong", format); case CNumLitSuffixLL: - return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_longlong"); + return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_longlong", format); case CNumLitSuffixLLU: - return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_ulonglong"); + return trans_create_node_unsigned_negative_type(c, tok->data.num_lit_int.x, negate, "c_ulonglong", format); } zig_unreachable(); } else if (tok->id == CTokIdNumLitFloat) { @@ -4947,6 +4964,7 @@ static AstNode *parse_ctok_primary_expr(Context *c, CTokenize *ctok, size_t *tok outer_if_then->data.fn_call_expr.params.append(node_to_cast); return trans_create_node_if(c, outer_if_cond, outer_if_then, inner_if); } + case CTokIdPlus: case CTokIdDot: case CTokIdEOF: case CTokIdRParen: @@ -4954,7 +4972,11 @@ static AstNode *parse_ctok_primary_expr(Context *c, CTokenize *ctok, size_t *tok case CTokIdBang: case CTokIdTilde: case CTokIdShl: + case CTokIdShr: case CTokIdLt: + case CTokIdGt: + case CTokIdInc: + case CTokIdDec: // not able to make sense of this return nullptr; } @@ -4993,6 +5015,27 @@ static AstNode *parse_ctok_suffix_op_expr(Context *c, CTokenize *ctok, size_t *t if (rhs_node == nullptr) return nullptr; node = trans_create_node_bin_op(c, node, BinOpTypeBitShiftLeft, rhs_node); + } else if (first_tok->id == CTokIdShr) { + *tok_i += 1; + + AstNode *rhs_node = parse_ctok_expr(c, ctok, tok_i); + if (rhs_node == nullptr) + return nullptr; + node = trans_create_node_bin_op(c, node, BinOpTypeBitShiftRight, rhs_node); + } else if (first_tok->id == CTokIdPlus) { + *tok_i += 1; + + AstNode *rhs_node = parse_ctok_expr(c, ctok, tok_i); + if (rhs_node == nullptr) + return nullptr; + node = trans_create_node_bin_op(c, node, BinOpTypeAdd, rhs_node); + } else if (first_tok->id == CTokIdMinus) { + *tok_i += 1; + + AstNode *rhs_node = parse_ctok_expr(c, ctok, tok_i); + if (rhs_node == nullptr) + return nullptr; + node = trans_create_node_bin_op(c, node, BinOpTypeSub, rhs_node); } else { return node; } @@ -5040,11 +5083,11 @@ static AstNode *parse_ctok_prefix_op_expr(Context *c, CTokenize *ctok, size_t *t } } -static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *char_ptr) { +static const char *process_macro(Context *c, CTokenize *ctok, Buf *name, const char *char_ptr) { tokenize_c_macro(ctok, (const uint8_t *)char_ptr); if (ctok->error) { - return; + return "tokenize error"; } size_t tok_i = 0; @@ -5054,21 +5097,27 @@ static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *ch AstNode *result_node = parse_ctok_suffix_op_expr(c, ctok, &tok_i); if (result_node == nullptr) { - return; + return "unsupported expression"; } CTok *eof_tok = &ctok->tokens.at(tok_i); if (eof_tok->id != CTokIdEOF) { - return; + return "missing end token"; } if (result_node->type == NodeTypeSymbol) { // if it equals itself, ignore. for example, from stdio.h: // #define stdin stdin Buf *symbol_name = result_node->data.symbol_expr.symbol; if (buf_eql_buf(name, symbol_name)) { - return; + return "equals itself"; } } c->macro_table.put(name, result_node); + return nullptr; +} + +static const char *process_macro_expansion(Context *c, CTokenize *ctok, Buf *name, const char *char_ptr) { + // At least log an error + return "not implemented"; // TODO: This } static void process_preprocessor_entities(Context *c, ZigClangASTUnit *unit) { @@ -5083,8 +5132,28 @@ static void process_preprocessor_entities(Context *c, ZigClangASTUnit *unit) { switch (ZigClangPreprocessedEntity_getKind(entity)) { case ZigClangPreprocessedEntity_InvalidKind: case ZigClangPreprocessedEntity_InclusionDirectiveKind: - case ZigClangPreprocessedEntity_MacroExpansionKind: continue; + case ZigClangPreprocessedEntity_MacroExpansionKind: + { + ZigClangMacroExpansion *macro_expansion = reinterpret_cast(entity); + const ZigClangMacroDefinitionRecord *macro = ZigClangMacroExpansion_getDefinition(macro_expansion); + if (macro == nullptr) { + continue; // It happens + } + const char *raw_name = ZigClangMacroDefinitionRecord_getName_getNameStart(macro); + ZigClangSourceLocation begin_loc = ZigClangMacroDefinitionRecord_getSourceRange_getBegin(macro); + Buf *name = buf_create_from_str(raw_name); + if (name_exists_global(c, name)) { + continue; + } + + const char *begin_c = ZigClangSourceManager_getCharacterData(c->source_manager, begin_loc); + const char *err = process_macro_expansion(c, &ctok, name, begin_c); + if (err != nullptr) { + emit_warning(c, begin_loc, "ignored macro expansion '%s': %s", buf_ptr(name), err); + } + continue; + } case ZigClangPreprocessedEntity_MacroDefinitionKind: { ZigClangMacroDefinitionRecord *macro = reinterpret_cast(entity); @@ -5101,9 +5170,11 @@ static void process_preprocessor_entities(Context *c, ZigClangASTUnit *unit) { if (name_exists_global(c, name)) { continue; } - const char *begin_c = ZigClangSourceManager_getCharacterData(c->source_manager, begin_loc); - process_macro(c, &ctok, name, begin_c); + const char* err = process_macro(c, &ctok, name, begin_c); + if (err != nullptr) { + emit_warning(c, begin_loc, "ignored macro definition '%s': %s", buf_ptr(name), err); + } } } } @@ -5116,7 +5187,11 @@ Error parse_h_file(CodeGen *codegen, AstNode **out_root_node, { Context context = {0}; Context *c = &context; - c->warnings_on = codegen->verbose_cimport; + + // This should be on by default otherwise users assume it correctly + // translated everything even if it doesn't + c->warnings_on = !codegen->quiet_translate_c; + if (mode == TranslateModeImport) { c->want_export = false; } else { diff --git a/src/zig_clang.cpp b/src/zig_clang.cpp index 93646de97b7d..2a0aadc0ca48 100644 --- a/src/zig_clang.cpp +++ b/src/zig_clang.cpp @@ -2605,6 +2605,11 @@ struct ZigClangSourceLocation ZigClangMacroDefinitionRecord_getSourceRange_getEn return bitcast(casted->getSourceRange().getEnd()); } +const struct ZigClangMacroDefinitionRecord *ZigClangMacroExpansion_getDefinition(const struct ZigClangMacroExpansion *self) { + auto casted = reinterpret_cast(self); + return reinterpret_cast(casted->getDefinition()); +} + ZigClangRecordDecl_field_iterator ZigClangRecordDecl_field_begin(const struct ZigClangRecordDecl *self) { auto casted = reinterpret_cast(self); return bitcast(casted->field_begin()); diff --git a/src/zig_clang.h b/src/zig_clang.h index ce71612468ad..c7c23c2ec57c 100644 --- a/src/zig_clang.h +++ b/src/zig_clang.h @@ -112,6 +112,7 @@ struct ZigClangImplicitCastExpr; struct ZigClangIncompleteArrayType; struct ZigClangIntegerLiteral; struct ZigClangMacroDefinitionRecord; +struct ZigClangMacroExpansion; struct ZigClangMacroQualifiedType; struct ZigClangMemberExpr; struct ZigClangNamedDecl; @@ -1106,6 +1107,8 @@ ZIG_EXTERN_C const char *ZigClangMacroDefinitionRecord_getName_getNameStart(cons ZIG_EXTERN_C struct ZigClangSourceLocation ZigClangMacroDefinitionRecord_getSourceRange_getBegin(const struct ZigClangMacroDefinitionRecord *); ZIG_EXTERN_C struct ZigClangSourceLocation ZigClangMacroDefinitionRecord_getSourceRange_getEnd(const struct ZigClangMacroDefinitionRecord *); +ZIG_EXTERN_C const struct ZigClangMacroDefinitionRecord *ZigClangMacroExpansion_getDefinition(const struct ZigClangMacroExpansion *); + ZIG_EXTERN_C bool ZigClangFieldDecl_isBitField(const struct ZigClangFieldDecl *); ZIG_EXTERN_C struct ZigClangQualType ZigClangFieldDecl_getType(const struct ZigClangFieldDecl *); ZIG_EXTERN_C struct ZigClangSourceLocation ZigClangFieldDecl_getLocation(const struct ZigClangFieldDecl *);