Skip to content

Commit 3d7305a

Browse files
committed
Add flag for the SoftFail case of the LLVM disassembler.
The LLVM disassembler returns SoftFail if the instruction can be decoded, but it is illegal due to other reasons. E.g. because it uses operands it is not allowed to use or the instrucion is invalid in a given context.
1 parent f2f0a3c commit 3d7305a

File tree

19 files changed

+217
-28
lines changed

19 files changed

+217
-28
lines changed

MCInst.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,3 +319,13 @@ void MCInst_updateWithTmpMI(MCInst *MI, MCInst *TmpMI) {
319319
assert(MI->size < MAX_MC_OPS);
320320
memcpy(MI->Operands, TmpMI->Operands, sizeof(MI->Operands[0]) * MI->size);
321321
}
322+
323+
/// @brief Sets the softfail/illegal flag in the cs_insn.
324+
/// Setting it indicates the instruction can be decoded, but is invalid
325+
/// due to not allowed operands or an illegal context.
326+
///
327+
/// @param MI The MCInst holding the cs_insn currently decoded.
328+
void MCInst_setSoftFail(MCInst *MI) {
329+
assert(MI && MI->flat_insn);
330+
MI->flat_insn->illegal = true;
331+
}

MCInst.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,4 +179,6 @@ static inline bool MCInst_isAlias(const MCInst *MI) {
179179

180180
void MCInst_updateWithTmpMI(MCInst *MI, MCInst *TmpMI);
181181

182+
void MCInst_setSoftFail(MCInst *MI);
183+
182184
#endif

arch/AArch64/AArch64Mapping.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -821,11 +821,14 @@ bool AArch64_getInstruction(csh handle, const uint8_t *code, size_t code_len,
821821
void *info)
822822
{
823823
AArch64_init_cs_detail(MI);
824-
bool Result = AArch64_LLVM_getInstruction(handle, code, code_len, MI,
824+
DecodeStatus Result = AArch64_LLVM_getInstruction(handle, code, code_len, MI,
825825
size, address,
826-
info) != MCDisassembler_Fail;
826+
info);
827827
AArch64_set_instr_map_data(MI);
828-
return Result;
828+
if (Result == MCDisassembler_SoftFail) {
829+
MCInst_setSoftFail(MI);
830+
}
831+
return Result != MCDisassembler_Fail;
829832
}
830833

831834
/// Patches the register names with Capstone specific alias.

arch/ARC/ARCMapping.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,14 @@ bool ARC_getInstruction(csh handle, const uint8_t *code, size_t code_len,
142142
{
143143
uint64_t temp_size;
144144
ARC_init_cs_detail(instr);
145-
bool Result = ARC_LLVM_getInstruction(instr, &temp_size, code,
146-
code_len, address, info) !=
147-
MCDisassembler_Fail;
145+
DecodeStatus Result = ARC_LLVM_getInstruction(instr, &temp_size, code,
146+
code_len, address, info);
148147
ARC_set_instr_map_data(instr);
149148
*size = temp_size;
150-
return Result;
149+
if (Result == MCDisassembler_SoftFail) {
150+
MCInst_setSoftFail(instr);
151+
}
152+
return Result != MCDisassembler_Fail;
151153
}
152154

153155
void ARC_printer(MCInst *MI, SStream *O,
@@ -285,4 +287,4 @@ void ARC_add_cs_detail(MCInst *MI, int op_group,
285287
}
286288
}
287289

288-
#endif
290+
#endif

arch/ARM/ARMMapping.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -816,11 +816,14 @@ bool ARM_getInstruction(csh handle, const uint8_t *code, size_t code_len,
816816
void *info)
817817
{
818818
ARM_init_cs_detail(instr);
819-
bool Result = ARM_LLVM_getInstruction(handle, code, code_len, instr,
819+
DecodeStatus Result = ARM_LLVM_getInstruction(handle, code, code_len, instr,
820820
size, address,
821-
info) != MCDisassembler_Fail;
821+
info);
822822
ARM_set_instr_map_data(instr);
823-
return Result;
823+
if (Result == MCDisassembler_SoftFail) {
824+
MCInst_setSoftFail(instr);
825+
}
826+
return Result != MCDisassembler_Fail;
824827
}
825828

826829
#define GET_REGINFO_MC_DESC

arch/Alpha/AlphaMapping.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,13 @@ bool Alpha_getInstruction(csh handle, const uint8_t *code,
173173
uint16_t *size, uint64_t address, void *info)
174174
{
175175
Alpha_init_cs_detail(instr);
176-
bool Result = Alpha_LLVM_getInstruction(handle, code, code_len, instr, size,
176+
DecodeStatus Result = Alpha_LLVM_getInstruction(handle, code, code_len, instr, size,
177177
address, info);
178178
Alpha_set_instr_map_data(instr);
179-
return Result;
179+
if (Result == MCDisassembler_SoftFail) {
180+
MCInst_setSoftFail(instr);
181+
}
182+
return Result != MCDisassembler_Fail;
180183
}
181184

182185
#endif

arch/LoongArch/LoongArchMapping.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -386,12 +386,14 @@ bool LoongArch_getInstruction(csh handle, const uint8_t *code, size_t code_len,
386386
{
387387
uint64_t temp_size;
388388
LoongArch_init_cs_detail(instr);
389-
bool Result = LoongArch_LLVM_getInstruction(instr, &temp_size, code,
390-
code_len, address, info) !=
391-
MCDisassembler_Fail;
389+
DecodeStatus Result = LoongArch_LLVM_getInstruction(instr, &temp_size, code,
390+
code_len, address, info);
392391
LoongArch_set_instr_map_data(instr);
393392
*size = temp_size;
394-
return Result;
393+
if (Result == MCDisassembler_SoftFail) {
394+
MCInst_setSoftFail(instr);
395+
}
396+
return Result != MCDisassembler_Fail;
395397
}
396398

397399
/// Adds group to the instruction which are not defined in LLVM.

arch/Mips/MipsMapping.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -197,14 +197,17 @@ bool Mips_getInstruction(csh handle, const uint8_t *code, size_t code_len,
197197
instr->MRI = (MCRegisterInfo *)info;
198198
map_set_fill_detail_ops(instr, true);
199199

200-
bool result = Mips_LLVM_getInstruction(instr, &size64, code, code_len,
200+
DecodeStatus Result = Mips_LLVM_getInstruction(instr, &size64, code, code_len,
201201
address,
202-
info) != MCDisassembler_Fail;
203-
if (result) {
202+
info);
203+
*size = size64;
204+
if (Result != MCDisassembler_Fail) {
204205
Mips_set_instr_map_data(instr);
205206
}
206-
*size = size64;
207-
return result;
207+
if (Result == MCDisassembler_SoftFail) {
208+
MCInst_setSoftFail(instr);
209+
}
210+
return Result != MCDisassembler_Fail;
208211
}
209212

210213
void Mips_printer(MCInst *MI, SStream *O, void * /* MCRegisterInfo* */ info)

arch/RISCV/RISCVDisassembler.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,11 +421,15 @@ bool RISCV_getInstruction(csh ud, const uint8_t *code, size_t code_len,
421421
{
422422
cs_struct *handle = (cs_struct *)(uintptr_t)ud;
423423

424-
return MCDisassembler_Success ==
424+
DecodeStatus Result =
425425
RISCVDisassembler_getInstruction(handle->mode, instr,
426426
code, code_len,
427427
size, address,
428428
(MCRegisterInfo *)info);
429+
if (Result == MCDisassembler_SoftFail) {
430+
MCInst_setSoftFail(instr);
431+
}
432+
return Result != MCDisassembler_Fail;
429433

430434
}
431435

arch/SystemZ/SystemZMapping.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,13 @@ bool SystemZ_getInstruction(csh handle, const uint8_t *bytes, size_t bytes_len,
9494
{
9595
SystemZ_init_cs_detail(MI);
9696
MI->MRI = (MCRegisterInfo *)info;
97-
DecodeStatus result = SystemZ_LLVM_getInstruction(
97+
DecodeStatus Result = SystemZ_LLVM_getInstruction(
9898
handle, bytes, bytes_len, MI, size, address, info);
9999
SystemZ_set_instr_map_data(MI, bytes, bytes_len);
100-
return result != MCDisassembler_Fail;
100+
if (Result == MCDisassembler_SoftFail) {
101+
MCInst_setSoftFail(MI);
102+
}
103+
return Result != MCDisassembler_Fail;
101104
}
102105

103106
// given internal insn id, return public instruction info

arch/XCore/XCoreDisassembler.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,9 @@ bool XCore_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *
747747
// Calling the auto-generated decoder function.
748748
Result = decodeInstruction_2(DecoderTable16, MI, insn16, address, info, 0);
749749
if (Result != MCDisassembler_Fail) {
750+
if (Result == MCDisassembler_SoftFail) {
751+
MCInst_setSoftFail(MI);
752+
}
750753
*size = 2;
751754
return true;
752755
}
@@ -758,6 +761,9 @@ bool XCore_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *
758761
// Calling the auto-generated decoder function.
759762
Result = decodeInstruction_4(DecoderTable32, MI, insn32, address, info, 0);
760763
if (Result != MCDisassembler_Fail) {
764+
if (Result == MCDisassembler_SoftFail) {
765+
MCInst_setSoftFail(MI);
766+
}
761767
*size = 4;
762768
return true;
763769
}

arch/Xtensa/XtensaMapping.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,13 @@ bool Xtensa_disasm(csh handle, const uint8_t *code, size_t code_len,
7373
{
7474
DecodeStatus res = Xtensa_LLVM_getInstruction(instr, size, code,
7575
code_len, address);
76-
if (res == MCDisassembler_Success) {
76+
if (res != MCDisassembler_Fail) {
7777
set_instr_map_data(instr);
7878
}
79-
return res == MCDisassembler_Success;
79+
if (res == MCDisassembler_SoftFail) {
80+
MCInst_setSoftFail(instr);
81+
}
82+
return res != MCDisassembler_Fail;
8083
}
8184

8285
const char *Xtensa_reg_name(csh handle, unsigned int id)

bindings/python/capstone/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,7 @@ class _cs_insn(ctypes.Structure):
624624
('mnemonic', ctypes.c_char * 32),
625625
('op_str', ctypes.c_char * 160),
626626
('is_alias', ctypes.c_bool),
627+
('illegal', ctypes.c_bool),
627628
('usesAliasDetails', ctypes.c_bool),
628629
('detail', ctypes.POINTER(_cs_detail)),
629630
)
@@ -833,6 +834,13 @@ def size(self):
833834
def is_alias(self):
834835
return self._raw.is_alias
835836

837+
# return instruction's illegal flag
838+
# Set if instruction can be decoded but is invalid
839+
# due to context or illegal operands.
840+
@property
841+
def illegal(self):
842+
return self._raw.illegal
843+
836844
# return instruction's alias_id
837845
@property
838846
def alias_id(self):

bindings/python/cstest_py/src/cstest_py/cstest.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,9 @@ def compare(self, actual_insns: list[CsInsn], bits: int) -> TestResult:
253253
if not compare_tbool(a_insn.is_alias, e_insn.get("is_alias"), "is_alias"):
254254
return TestResult.FAILED
255255

256+
if not compare_tbool(a_insn.illegal, e_insn.get("illegal"), "illegal"):
257+
return TestResult.FAILED
258+
256259
if not compare_enum(a_insn.alias_id, e_insn.get("alias_id"), "alias_id"):
257260
return TestResult.FAILED
258261

cstool/cstool.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,8 @@ int main(int argc, char **argv)
768768
}
769769
}
770770

771-
printf(" %s\t%s\n", insn[i].mnemonic, insn[i].op_str);
771+
printf(" %s\t%s%s\n", insn[i].mnemonic, insn[i].op_str,
772+
insn[i].illegal ? "\t; Illegal instruction" : "");
772773

773774
if (detail_flag) {
774775
print_details(handle, arch, mode, &insn[i]);

include/capstone/capstone.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,13 @@ typedef struct cs_insn {
510510
/// False: The detail operands are from the real instruction.
511511
bool usesAliasDetails;
512512

513+
/// True: The bytes disassemble to a valid instruction, but it is illegal by ISA definitions.
514+
/// For example the instruction uses a register which is not allowed or it appears in
515+
/// an invalid context.
516+
///
517+
/// False: The instruction decoded correctly and is valid.
518+
bool illegal;
519+
513520
/// Pointer to cs_detail.
514521
/// NOTE: detail pointer is only valid when both requirements below are met:
515522
/// (1) CS_OP_DETAIL = CS_OPT_ON

suite/cstest/include/test_case.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ typedef struct {
6262
char *asm_text; // mandatory
6363
char *op_str;
6464
int32_t is_alias; ///< 0 == not given, >0 == true, <0 == false
65+
int32_t illegal; ///< 0 == not given, >0 == true, <0 == false
6566
char *alias_id;
6667
char *mnemonic;
6768
TestDetail *details;
@@ -81,6 +82,8 @@ static const cyaml_schema_field_t test_insn_data_mapping_schema[] = {
8182
TestInsnData, op_str, 0, CYAML_UNLIMITED),
8283
CYAML_FIELD_INT("is_alias", CYAML_FLAG_OPTIONAL, TestInsnData,
8384
is_alias),
85+
CYAML_FIELD_INT("illegal", CYAML_FLAG_OPTIONAL, TestInsnData,
86+
illegal),
8487
CYAML_FIELD_STRING_PTR("alias_id",
8588
CYAML_FLAG_POINTER | CYAML_FLAG_OPTIONAL,
8689
TestInsnData, alias_id, 0, CYAML_UNLIMITED),

suite/cstest/src/test_case.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ TestInsnData *test_insn_data_clone(TestInsnData *test_insn_data)
115115
cs_strdup(test_insn_data->id) :
116116
NULL;
117117
tid->is_alias = test_insn_data->is_alias;
118+
tid->illegal = test_insn_data->illegal;
118119
tid->mnemonic = test_insn_data->mnemonic ?
119120
cs_strdup(test_insn_data->mnemonic) :
120121
NULL;
@@ -248,6 +249,13 @@ void test_expected_compare(csh *handle, TestExpected *expected, cs_insn *insns,
248249
assert_false(insns[i].is_alias);
249250
}
250251
}
252+
if (expec_data->illegal != 0) {
253+
if (expec_data->illegal > 0) {
254+
assert_true(insns[i].illegal);
255+
} else {
256+
assert_false(insns[i].illegal);
257+
}
258+
}
251259
if (expec_data->alias_id) {
252260
assert_true(ids_match((uint32_t)insns[i].alias_id,
253261
expec_data->alias_id));

0 commit comments

Comments
 (0)