Skip to content

Add flag for the SoftFail case of the LLVM disassembler. #2707

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions MCInst.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,13 @@ void MCInst_updateWithTmpMI(MCInst *MI, MCInst *TmpMI) {
assert(MI->size < MAX_MC_OPS);
memcpy(MI->Operands, TmpMI->Operands, sizeof(MI->Operands[0]) * MI->size);
}

/// @brief Sets the softfail/illegal flag in the cs_insn.
/// Setting it indicates the instruction can be decoded, but is invalid
/// due to not allowed operands or an illegal context.
///
/// @param MI The MCInst holding the cs_insn currently decoded.
void MCInst_setSoftFail(MCInst *MI) {
assert(MI && MI->flat_insn);
MI->flat_insn->illegal = true;
}
2 changes: 2 additions & 0 deletions MCInst.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,4 +179,6 @@ static inline bool MCInst_isAlias(const MCInst *MI) {

void MCInst_updateWithTmpMI(MCInst *MI, MCInst *TmpMI);

void MCInst_setSoftFail(MCInst *MI);

#endif
9 changes: 6 additions & 3 deletions arch/AArch64/AArch64Mapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -821,11 +821,14 @@ bool AArch64_getInstruction(csh handle, const uint8_t *code, size_t code_len,
void *info)
{
AArch64_init_cs_detail(MI);
bool Result = AArch64_LLVM_getInstruction(handle, code, code_len, MI,
DecodeStatus Result = AArch64_LLVM_getInstruction(handle, code, code_len, MI,
size, address,
info) != MCDisassembler_Fail;
info);
AArch64_set_instr_map_data(MI);
return Result;
if (Result == MCDisassembler_SoftFail) {
MCInst_setSoftFail(MI);
}
return Result != MCDisassembler_Fail;
}

/// Patches the register names with Capstone specific alias.
Expand Down
12 changes: 7 additions & 5 deletions arch/ARC/ARCMapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,14 @@ bool ARC_getInstruction(csh handle, const uint8_t *code, size_t code_len,
{
uint64_t temp_size;
ARC_init_cs_detail(instr);
bool Result = ARC_LLVM_getInstruction(instr, &temp_size, code,
code_len, address, info) !=
MCDisassembler_Fail;
DecodeStatus Result = ARC_LLVM_getInstruction(instr, &temp_size, code,
code_len, address, info);
ARC_set_instr_map_data(instr);
*size = temp_size;
return Result;
if (Result == MCDisassembler_SoftFail) {
MCInst_setSoftFail(instr);
}
return Result != MCDisassembler_Fail;
}

void ARC_printer(MCInst *MI, SStream *O,
Expand Down Expand Up @@ -285,4 +287,4 @@ void ARC_add_cs_detail(MCInst *MI, int op_group,
}
}

#endif
#endif
9 changes: 6 additions & 3 deletions arch/ARM/ARMMapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -816,11 +816,14 @@ bool ARM_getInstruction(csh handle, const uint8_t *code, size_t code_len,
void *info)
{
ARM_init_cs_detail(instr);
bool Result = ARM_LLVM_getInstruction(handle, code, code_len, instr,
DecodeStatus Result = ARM_LLVM_getInstruction(handle, code, code_len, instr,
size, address,
info) != MCDisassembler_Fail;
info);
ARM_set_instr_map_data(instr);
return Result;
if (Result == MCDisassembler_SoftFail) {
MCInst_setSoftFail(instr);
}
return Result != MCDisassembler_Fail;
}

#define GET_REGINFO_MC_DESC
Expand Down
7 changes: 5 additions & 2 deletions arch/Alpha/AlphaMapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,13 @@ bool Alpha_getInstruction(csh handle, const uint8_t *code,
uint16_t *size, uint64_t address, void *info)
{
Alpha_init_cs_detail(instr);
bool Result = Alpha_LLVM_getInstruction(handle, code, code_len, instr, size,
DecodeStatus Result = Alpha_LLVM_getInstruction(handle, code, code_len, instr, size,
address, info);
Alpha_set_instr_map_data(instr);
return Result;
if (Result == MCDisassembler_SoftFail) {
MCInst_setSoftFail(instr);
}
return Result != MCDisassembler_Fail;
}

#endif
10 changes: 6 additions & 4 deletions arch/LoongArch/LoongArchMapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -386,12 +386,14 @@ bool LoongArch_getInstruction(csh handle, const uint8_t *code, size_t code_len,
{
uint64_t temp_size;
LoongArch_init_cs_detail(instr);
bool Result = LoongArch_LLVM_getInstruction(instr, &temp_size, code,
code_len, address, info) !=
MCDisassembler_Fail;
DecodeStatus Result = LoongArch_LLVM_getInstruction(instr, &temp_size, code,
code_len, address, info);
LoongArch_set_instr_map_data(instr);
*size = temp_size;
return Result;
if (Result == MCDisassembler_SoftFail) {
MCInst_setSoftFail(instr);
}
return Result != MCDisassembler_Fail;
}

/// Adds group to the instruction which are not defined in LLVM.
Expand Down
13 changes: 8 additions & 5 deletions arch/Mips/MipsMapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,14 +197,17 @@ bool Mips_getInstruction(csh handle, const uint8_t *code, size_t code_len,
instr->MRI = (MCRegisterInfo *)info;
map_set_fill_detail_ops(instr, true);

bool result = Mips_LLVM_getInstruction(instr, &size64, code, code_len,
DecodeStatus Result = Mips_LLVM_getInstruction(instr, &size64, code, code_len,
address,
info) != MCDisassembler_Fail;
if (result) {
info);
*size = size64;
if (Result != MCDisassembler_Fail) {
Mips_set_instr_map_data(instr);
}
*size = size64;
return result;
if (Result == MCDisassembler_SoftFail) {
MCInst_setSoftFail(instr);
}
return Result != MCDisassembler_Fail;
}

void Mips_printer(MCInst *MI, SStream *O, void * /* MCRegisterInfo* */ info)
Expand Down
6 changes: 5 additions & 1 deletion arch/RISCV/RISCVDisassembler.c
Original file line number Diff line number Diff line change
Expand Up @@ -421,11 +421,15 @@ bool RISCV_getInstruction(csh ud, const uint8_t *code, size_t code_len,
{
cs_struct *handle = (cs_struct *)(uintptr_t)ud;

return MCDisassembler_Success ==
DecodeStatus Result =
RISCVDisassembler_getInstruction(handle->mode, instr,
code, code_len,
size, address,
(MCRegisterInfo *)info);
if (Result == MCDisassembler_SoftFail) {
MCInst_setSoftFail(instr);
}
return Result != MCDisassembler_Fail;

}

Expand Down
7 changes: 5 additions & 2 deletions arch/SystemZ/SystemZMapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,13 @@ bool SystemZ_getInstruction(csh handle, const uint8_t *bytes, size_t bytes_len,
{
SystemZ_init_cs_detail(MI);
MI->MRI = (MCRegisterInfo *)info;
DecodeStatus result = SystemZ_LLVM_getInstruction(
DecodeStatus Result = SystemZ_LLVM_getInstruction(
handle, bytes, bytes_len, MI, size, address, info);
SystemZ_set_instr_map_data(MI, bytes, bytes_len);
return result != MCDisassembler_Fail;
if (Result == MCDisassembler_SoftFail) {
MCInst_setSoftFail(MI);
}
return Result != MCDisassembler_Fail;
}

// given internal insn id, return public instruction info
Expand Down
6 changes: 6 additions & 0 deletions arch/XCore/XCoreDisassembler.c
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,9 @@ bool XCore_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *
// Calling the auto-generated decoder function.
Result = decodeInstruction_2(DecoderTable16, MI, insn16, address, info, 0);
if (Result != MCDisassembler_Fail) {
if (Result == MCDisassembler_SoftFail) {
MCInst_setSoftFail(MI);
}
*size = 2;
return true;
}
Expand All @@ -758,6 +761,9 @@ bool XCore_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *
// Calling the auto-generated decoder function.
Result = decodeInstruction_4(DecoderTable32, MI, insn32, address, info, 0);
if (Result != MCDisassembler_Fail) {
if (Result == MCDisassembler_SoftFail) {
MCInst_setSoftFail(MI);
}
*size = 4;
return true;
}
Expand Down
7 changes: 5 additions & 2 deletions arch/Xtensa/XtensaMapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,13 @@ bool Xtensa_disasm(csh handle, const uint8_t *code, size_t code_len,
{
DecodeStatus res = Xtensa_LLVM_getInstruction(instr, size, code,
code_len, address);
if (res == MCDisassembler_Success) {
if (res != MCDisassembler_Fail) {
set_instr_map_data(instr);
}
return res == MCDisassembler_Success;
if (res == MCDisassembler_SoftFail) {
MCInst_setSoftFail(instr);
}
return res != MCDisassembler_Fail;
}

const char *Xtensa_reg_name(csh handle, unsigned int id)
Expand Down
8 changes: 8 additions & 0 deletions bindings/python/capstone/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,7 @@ class _cs_insn(ctypes.Structure):
('op_str', ctypes.c_char * 160),
('is_alias', ctypes.c_bool),
('usesAliasDetails', ctypes.c_bool),
('illegal', ctypes.c_bool),
('detail', ctypes.POINTER(_cs_detail)),
)

Expand Down Expand Up @@ -833,6 +834,13 @@ def size(self):
def is_alias(self):
return self._raw.is_alias

# return instruction's illegal flag
# Set if instruction can be decoded but is invalid
# due to context or illegal operands.
@property
def illegal(self):
return self._raw.illegal

# return instruction's alias_id
@property
def alias_id(self):
Expand Down
3 changes: 3 additions & 0 deletions bindings/python/cstest_py/src/cstest_py/cstest.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ def compare(self, actual_insns: list[CsInsn], bits: int) -> TestResult:
if not compare_tbool(a_insn.is_alias, e_insn.get("is_alias"), "is_alias"):
return TestResult.FAILED

if not compare_tbool(a_insn.illegal, e_insn.get("illegal"), "illegal"):
return TestResult.FAILED

if not compare_enum(a_insn.alias_id, e_insn.get("alias_id"), "alias_id"):
return TestResult.FAILED

Expand Down
3 changes: 2 additions & 1 deletion cstool/cstool.c
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,8 @@ int main(int argc, char **argv)
}
}

printf(" %s\t%s\n", insn[i].mnemonic, insn[i].op_str);
printf(" %s\t%s%s\n", insn[i].mnemonic, insn[i].op_str,
insn[i].illegal ? "\t; Illegal instruction" : "");

if (detail_flag) {
print_details(handle, arch, mode, &insn[i]);
Expand Down
7 changes: 6 additions & 1 deletion docs/cs_v6_release_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,12 @@ For `v7` we can then focus on other big features, like [SAIL](https://github.com

## New features

These features are only supported by `auto-sync`-enabled architectures.
**LLVM disassembler based modules**

- The `cs_insn.illegal` flag was added. If it is set the instruction is decoded correctly but is considered illegal.
This happens for instructions which use invalid operands or are in an illegal context.

**Auto-Sync-enabled modules**

**More code quality checks**

Expand Down
7 changes: 7 additions & 0 deletions include/capstone/capstone.h
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,13 @@ typedef struct cs_insn {
/// False: The detail operands are from the real instruction.
bool usesAliasDetails;

/// True: The bytes disassemble to a valid instruction, but it is illegal by ISA definitions.
/// For example the instruction uses a register which is not allowed or it appears in
/// an invalid context.
///
/// False: The instruction decoded correctly and is valid.
bool illegal;

/// Pointer to cs_detail.
/// NOTE: detail pointer is only valid when both requirements below are met:
/// (1) CS_OP_DETAIL = CS_OPT_ON
Expand Down
3 changes: 3 additions & 0 deletions suite/cstest/include/test_case.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ typedef struct {
char *asm_text; // mandatory
char *op_str;
int32_t is_alias; ///< 0 == not given, >0 == true, <0 == false
int32_t illegal; ///< 0 == not given, >0 == true, <0 == false
char *alias_id;
char *mnemonic;
TestDetail *details;
Expand All @@ -81,6 +82,8 @@ static const cyaml_schema_field_t test_insn_data_mapping_schema[] = {
TestInsnData, op_str, 0, CYAML_UNLIMITED),
CYAML_FIELD_INT("is_alias", CYAML_FLAG_OPTIONAL, TestInsnData,
is_alias),
CYAML_FIELD_INT("illegal", CYAML_FLAG_OPTIONAL, TestInsnData,
illegal),
CYAML_FIELD_STRING_PTR("alias_id",
CYAML_FLAG_POINTER | CYAML_FLAG_OPTIONAL,
TestInsnData, alias_id, 0, CYAML_UNLIMITED),
Expand Down
8 changes: 8 additions & 0 deletions suite/cstest/src/test_case.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ TestInsnData *test_insn_data_clone(TestInsnData *test_insn_data)
cs_strdup(test_insn_data->id) :
NULL;
tid->is_alias = test_insn_data->is_alias;
tid->illegal = test_insn_data->illegal;
tid->mnemonic = test_insn_data->mnemonic ?
cs_strdup(test_insn_data->mnemonic) :
NULL;
Expand Down Expand Up @@ -248,6 +249,13 @@ void test_expected_compare(csh *handle, TestExpected *expected, cs_insn *insns,
assert_false(insns[i].is_alias);
}
}
if (expec_data->illegal != 0) {
if (expec_data->illegal > 0) {
assert_true(insns[i].illegal);
} else {
assert_false(insns[i].illegal);
}
}
if (expec_data->alias_id) {
assert_true(ids_match((uint32_t)insns[i].alias_id,
expec_data->alias_id));
Expand Down
Loading
Loading