From ada0ed82b33650ec2f3a009c5f114f855d33475d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladislav=20Hr=C4=8Dka?= Date: Mon, 22 Jul 2019 11:56:33 +0200 Subject: [PATCH] Improved basic block manipulation --- miasm/arch/aarch64/arch.py | 8 ++ miasm/arch/arm/arch.py | 16 ++++ miasm/arch/arm/sem.py | 58 -------------- miasm/arch/mep/arch.py | 8 ++ miasm/arch/mips32/arch.py | 8 ++ miasm/arch/msp430/arch.py | 8 ++ miasm/arch/ppc/arch.py | 8 ++ miasm/arch/sh4/arch.py | 8 ++ miasm/arch/x86/arch.py | 29 +++++++ miasm/core/asmblock.py | 136 +++++++++++++++++++++++++++++++++ miasm/core/graph.py | 17 +++++ miasm/expression/expression.py | 58 ++++++++++++++ 12 files changed, 304 insertions(+), 58 deletions(-) diff --git a/miasm/arch/aarch64/arch.py b/miasm/arch/aarch64/arch.py index d12fbe724..4c60b99b5 100644 --- a/miasm/arch/aarch64/arch.py +++ b/miasm/arch/aarch64/arch.py @@ -512,6 +512,14 @@ def reset_class(self): if hasattr(self, "sf"): self.sf.value = None + def cond_to_branch(self, cond_name): + """ + returns branch which is alternative to IR @cond_name + """ + raise RuntimeError("Not implemented") + assert cond_name in cond_dct_inv, "Unknown cond name" + cond_num = cond_dct_inv[cond_name] + return cond_to_br[cond_num] def aarch64op(name, fields, args=None, alias=False): dct = {"fields": fields, "alias":alias} diff --git a/miasm/arch/arm/arch.py b/miasm/arch/arm/arch.py index 497d6d689..decc4f076 100644 --- a/miasm/arch/arm/arch.py +++ b/miasm/arch/arm/arch.py @@ -677,6 +677,14 @@ def value(self, mode): def get_symbol_size(self, symbol, loc_db, mode): return 32 + def cond_to_branch(self, cond_name): + """ + returns branch which is alternative to IR @cond_name + """ + raise RuntimeError("Not implemented") + assert cond_name in cond_dct_inv, "Unknown cond name" + cond_num = cond_dct_inv[cond_name] + return cond_to_br[cond_num] class mn_armt(cls_mn): name = "armt" @@ -784,6 +792,14 @@ def get_args_expr(self): def get_symbol_size(self, symbol, loc_db, mode): return 32 + def cond_to_branch(self, cond_name): + """ + returns branch which is alternative to IR @cond_name + """ + raise RuntimeError("Not implemented") + assert cond_name in cond_dct_inv, "Unknown cond name" + cond_num = cond_dct_inv[cond_name] + return cond_to_br[cond_num] class arm_arg(m_arg): def asm_ast_to_expr(self, arg, loc_db): diff --git a/miasm/arch/arm/sem.py b/miasm/arch/arm/sem.py index 981a50605..7221a7387 100644 --- a/miasm/arch/arm/sem.py +++ b/miasm/arch/arm/sem.py @@ -1378,64 +1378,6 @@ def pkhtb(ir, instr, arg1, arg2, arg3): return e, [] -COND_EQ = 0 -COND_NE = 1 -COND_CS = 2 -COND_CC = 3 -COND_MI = 4 -COND_PL = 5 -COND_VS = 6 -COND_VC = 7 -COND_HI = 8 -COND_LS = 9 -COND_GE = 10 -COND_LT = 11 -COND_GT = 12 -COND_LE = 13 -COND_AL = 14 -COND_NV = 15 - -cond_dct = { - COND_EQ: "EQ", - COND_NE: "NE", - COND_CS: "CS", - COND_CC: "CC", - COND_MI: "MI", - COND_PL: "PL", - COND_VS: "VS", - COND_VC: "VC", - COND_HI: "HI", - COND_LS: "LS", - COND_GE: "GE", - COND_LT: "LT", - COND_GT: "GT", - COND_LE: "LE", - COND_AL: "AL", - # COND_NV: "NV", -} - -cond_dct_inv = dict((name, num) for num, name in viewitems(cond_dct)) - - -""" -Code Meaning (for cmp or subs) Flags Tested -eq Equal. Z==1 -ne Not equal. Z==0 -cs or hs Unsigned higher or same (or carry set). C==1 -cc or lo Unsigned lower (or carry clear). C==0 -mi Negative. The mnemonic stands for "minus". N==1 -pl Positive or zero. The mnemonic stands for "plus". N==0 -vs Signed overflow. The mnemonic stands for "V set". V==1 -vc No signed overflow. The mnemonic stands for "V clear". V==0 -hi Unsigned higher. (C==1) && (Z==0) -ls Unsigned lower or same. (C==0) || (Z==1) -ge Signed greater than or equal. N==V -lt Signed less than. N!=V -gt Signed greater than. (Z==0) && (N==V) -le Signed less than or equal. (Z==1) || (N!=V) -al (or omitted) Always executed. None tested. -""" - tab_cond = {COND_EQ: ExprOp("CC_EQ", zf), COND_NE: ExprOp("CC_NE", zf), COND_CS: ExprOp("CC_U>=", cf ^ ExprInt(1, 1)), # inv cf diff --git a/miasm/arch/mep/arch.py b/miasm/arch/mep/arch.py index 8a9f60fd0..d909745f4 100644 --- a/miasm/arch/mep/arch.py +++ b/miasm/arch/mep/arch.py @@ -492,6 +492,14 @@ def value(self, mode): else: raise NotImplementedError("Bad MeP endianness (%s)" % mode) + def cond_to_branch(self, cond_name): + """ + returns branch which is alternative to IR @cond_name + """ + raise RuntimeError("Not implemented") + assert cond_name in cond_dct_inv, "Unknown cond name" + cond_num = cond_dct_inv[cond_name] + return cond_to_br[cond_num] def addop(name, fields, args=None, alias=False): """Dynamically create the "name" object diff --git a/miasm/arch/mips32/arch.py b/miasm/arch/mips32/arch.py index 68841e1ea..07a5e151e 100644 --- a/miasm/arch/mips32/arch.py +++ b/miasm/arch/mips32/arch.py @@ -248,6 +248,14 @@ def value(self, mode): else: raise NotImplementedError('bad attrib') + def cond_to_branch(self, cond_name): + """ + returns branch which is alternative to IR @cond_name + """ + raise RuntimeError("Not implemented") + assert cond_name in cond_dct_inv, "Unknown cond name" + cond_num = cond_dct_inv[cond_name] + return cond_to_br[cond_num] def mips32op(name, fields, args=None, alias=False): diff --git a/miasm/arch/msp430/arch.py b/miasm/arch/msp430/arch.py index 65dd435e8..9517033cb 100644 --- a/miasm/arch/msp430/arch.py +++ b/miasm/arch/msp430/arch.py @@ -294,6 +294,14 @@ def reset_class(self): def getnextflow(self, loc_db): raise NotImplementedError('not fully functional') + def cond_to_branch(self, cond_name): + """ + returns branch which is alternative to IR @cond_name + """ + raise RuntimeError("Not implemented") + assert cond_name in cond_dct_inv, "Unknown cond name" + cond_num = cond_dct_inv[cond_name] + return cond_to_br[cond_num] def addop(name, fields, args=None, alias=False): dct = {"fields": fields} diff --git a/miasm/arch/ppc/arch.py b/miasm/arch/ppc/arch.py index 8f700bffc..c91b928d0 100644 --- a/miasm/arch/ppc/arch.py +++ b/miasm/arch/ppc/arch.py @@ -283,6 +283,14 @@ def value(self, mode): def get_symbol_size(self, symbol, loc_db, mode): return 32 + def cond_to_branch(self, cond_name): + """ + returns branch which is alternative to IR @cond_name + """ + raise RuntimeError("Not implemented") + assert cond_name in cond_dct_inv, "Unknown cond name" + cond_num = cond_dct_inv[cond_name] + return cond_to_br[cond_num] class ppc_reg(reg_noarg, ppc_arg): pass diff --git a/miasm/arch/sh4/arch.py b/miasm/arch/sh4/arch.py index 9310d3c3b..a6a379b95 100644 --- a/miasm/arch/sh4/arch.py +++ b/miasm/arch/sh4/arch.py @@ -557,6 +557,14 @@ def value(self, mode): v = super(mn_sh4, self).value(mode) return [x[::-1] for x in v] + def cond_to_branch(self, cond_name): + """ + returns branch which is alternative to IR @cond_name + """ + raise RuntimeError("Not implemented") + assert cond_name in cond_dct_inv, "Unknown cond name" + cond_num = cond_dct_inv[cond_name] + return cond_to_br[cond_num] class bs_dr0gbr(sh4_dgpreg): parser = dgbr_reg diff --git a/miasm/arch/x86/arch.py b/miasm/arch/x86/arch.py index 3053301ad..470b89352 100644 --- a/miasm/arch/x86/arch.py +++ b/miasm/arch/x86/arch.py @@ -31,6 +31,22 @@ unconditional_branch = ['JMP', 'JMPF'] +cond_to_br = { COND_EQ: "jz", + COND_NE: "jnz", + COND_CS: "jae", + COND_CC: "jb", + COND_MI: "js", + COND_PL: "jns", + COND_VS: "jo", + COND_VC: "jno", + COND_HI: "ja", + COND_LS: "jbe", + COND_GE: "jge", + COND_LT: "jl", + COND_GT: "jg", + COND_LE: "jle" +} + f_isad = "AD" f_s08 = "S08" f_u08 = "U08" @@ -461,6 +477,9 @@ class instruction_x86(instruction): def __init__(self, *args, **kargs): super(instruction_x86, self).__init__(*args, **kargs) + if self.additional_info == None: + self.additional_info = additional_info() + self.additional_info.g1.value = 0 def v_opmode(self): return self.additional_info.v_opmode @@ -622,6 +641,8 @@ class mn_x86(cls_mn): sp = {16: SP, 32: ESP, 64: RSP} instruction = instruction_x86 max_instruction_len = 15 + cond_branch = conditional_branch + uncond_branch = unconditional_branch @classmethod def getpc(cls, attrib): @@ -937,6 +958,14 @@ def filter_asm_candidates(cls, instr, candidates): cand_diff_mode.sort(key=len) return cand_same_mode + cand_diff_mode + def cond_to_branch(self, cond_name): + """ + returns branch which is alternative to IR @cond_name + """ + assert cond_name in cond_dct_inv, "Unknown cond name" + cond_num = cond_dct_inv[cond_name] + return cond_to_br[cond_num] + class bs_modname_size(bs_divert): prio = 1 diff --git a/miasm/core/asmblock.py b/miasm/core/asmblock.py index 0e715f410..a45eec7e6 100644 --- a/miasm/core/asmblock.py +++ b/miasm/core/asmblock.py @@ -852,6 +852,142 @@ def apply_splitting(self, loc_db, dis_block_callback=None, **kwargs): if rebuild_needed: self.rebuild_edges() + def ChangeSuccessor(self, block, old_successor, new_successor, mnemo, attrib, **args): + """ + changes relation between a node and its successor + perhaps changes c_next constraint to c_to and updates + @mnemo is cls_mn instance of particular architecture + @attrib is bit number(e.g. 16, 32, 64 bit) + @args: @uncond_jmp if set creates an unconditional branch instead of c_next or conditional branch + @cond_name if set changes previous branch instruction to the specific architecture alternative of its IR conditional representation + """ + from miasm.expression.expression import ExprLoc + successors = self.successors(block.loc_key) + csts = self.edges2constraint.get((block.loc_key, old_successor), None) + assert csts != None, 'Couldn\'t find edge: %s -> %s'%(old_successor, new_successor) + assert len(successors) < 3, 'Jump tables are currently not supported' + assert csts == 'c_next' or csts == 'c_to' + assert block.lines[-1].splitflow() + self.del_edge(block.loc_key, old_successor) + uncond_jmp = args.get("uncond_jmp", None) + cond_name = args.get("cond_name", None) + if csts == 'c_next': + if uncond_jmp is not None: + lockey = self.loc_db.add_location() + rs_block = AsmBlock(lockey) + inst = mnemo.instruction(mnemo.uncond_branch[0], attrib, [ExprLoc(new_successor, attrib)]) + rs_block.addline(inst) + self.add_block(rs_block) + self.add_edge(block.loc_key, lockey, "c_next") + self.add_edge(rs_block.loc_key, new_successor, "c_to") + else: + self.add_edge(block.loc_key, new_successor, "c_next") + else: + last_ln = block.lines[-1] + if last_ln.name not in mnemo.uncond_branch + mnemo.cond_branch: + #perhaps loop in x86 + block.lines[-1].args[0] = ExprLoc(new_successor, attrib) + else: + if cond_name is not None: + block.lines[-1] = mnemo.instruction(mnemo().cond_to_branch(cond_name), attrib, [ExprLoc(new_successor, attrib)]) + elif uncond_jmp is not None: + block.lines[-1] = mnemo.instruction(mnemo.uncond_branch[0], attrib, [ExprLoc(new_successor, attrib)]) + else: + block.lines[-1].args[0] = ExprLoc(new_successor, attrib) + self.add_edge(block.loc_key, new_successor, "c_to") + + def FixMultipleNextConstraints(self, mnemo, attrib): + """ + fixes multiple next constraints error by changing c_next constraints to c_to with unconditional branches + """ + for loc_key in set(self.nodes()): + next_edges = {edge: constraint for edge, constraint in self.edges2constraint.iteritems() if constraint == AsmConstraint.c_next} + pred_next = list(ploc_key for (ploc_key, dloc_key) in next_edges if dloc_key == loc_key) + if len(pred_next) > 1: + for i in range(1, len(pred_next)): + self.ChangeSuccessor(self.loc_key_to_block(pred_next[i]), loc_key, loc_key, mnemo, attrib, uncond_jmp = 1) + + def PasteBlockTo(self, block, mnemo, attrib, c_next_as_jmp = None, **args): + """ + Puts a node before, behind or between particular nodes + @block AsmBlock to be pasted + @c_next_as_jmp flag if set changes c_next to c_to by creating a block which consists of only 1 unconditional branch right behind @block + @args: @loc_key_before is loc_key of AsmBlock which should be @blocks predecessor + @head is loc_key of @loc_key_befores successor which should be replaced by @block + @loc_key_after is loc_key of AsmBlock which should be @blocks successor + @tail is loc_key of @blocks successor which should be replaced by @loc_key_after + """ + if args.get('loc_key_before', None) != None and args.get('head', None) != None: + self.ChangeSuccessor(self.loc_key_to_block(args['loc_key_before']), args['head'], block.loc_key, mnemo, attrib, uncond_jmp=args.get("uncond_jmp", None)) + if args.get('loc_key_after', None) != None and args.get('tail', None) != None: + self.ChangeSuccessor(block, args['tail'], args['loc_key_after'], mnemo, attrib, uncond_jmp=args.get("uncond_jmp", None)) + + def PasteSubAsmcfgTo(self, sub_asmcfg_head, mnemo, attrib, **args): + """ + Puts a series of basic blocks before, behind or between particular nodes + @sub_asmcfg_head is first AsmBlock of the series + @args: @loc_key_before is loc_key of AsmBlock which should be @sub_asmcfg_heads predecessor + @head is loc_key of @loc_key_befores successor which should be replaced by @sub_asmcfg_heads + @loc_key_after is loc_key of AsmBlock which should be @sub_asmcfg_heads successor + @tail is loc_key of @sub_asmcfg_heads successor which should be replaced by @loc_key_after + """ + if args.get('loc_key_before', None) != None and args.get('head', None) != None: + self.PasteBlockTo(self.loc_key_to_block(sub_asmcfg_head), mnemo, attrib, loc_key_before=args['loc_key_before'], head=args['head']) + if args.get('tail', None) != None and args.get('loc_key_after', None) != None: + nodes = set(self.reachable_sons_stop_node(sub_asmcfg_head, args['tail'])) + for pred in self.predecessors(args['tail']): + if pred in nodes: + self.PasteBlockTo(self.loc_key_to_block(pred), mnemo, attrib, loc_key_after=args['loc_key_after'], tail=args['tail']) + + def CopyBlock(self, block, mnemo, sv = 1): + """ + creates and returns a copy of @block + if flag @sv is set, adds it to current AsmCfg + """ + lockey = self.loc_db.add_location() + rs_block = AsmBlock(lockey) + for ln in block.lines: + tmp_ln = mnemo.instruction(ln.name, ln.mode, ln.args, ln.additional_info) + rs_block.addline(tmp_ln) + rs_block.bto = set(block.bto) + rs_block.alignment = block.alignment + if sv: + self.add_block(rs_block) + return rs_block + + def CopySubAsmcfg(self, head, tails, mnemo, attrib, prev = None, done = None): + """ + copies all basic blocks from @head to @tails + @prev is loc_key of block to have changed successor from @head loc_key to copy of @head + @done is set of already processed blocks + returns the first new basic block + """ + if done == None: + done = {} + if head in tails: + return prev + if head not in done: + done[head] = 1 + new_block = self.CopyBlock(self.loc_key_to_block(head), mnemo, 1) + if prev != None: + self.PasteBlockTo(new_block, mnemo, attrib, loc_key_before=prev, head=head) + for succ in self.successors(head): + self.CopySubAsmcfg(succ, tails, mnemo, attrib, new_block.loc_key, done) + else: + return None + return new_block + + def RemoveRedundantBlocks(self, src): + """ + deletes all blocks which are not reachable from @src + @src is loc_key of beginning of the function + """ + reachable_nodes = set(self.reachable_sons(src)) + all_nodes = set(self.nodes()) + for loc_key in all_nodes: + if loc_key not in reachable_nodes: + self.del_block(self.loc_key_to_block(loc_key)) + def __str__(self): out = [] for block in self.blocks: diff --git a/miasm/core/graph.py b/miasm/core/graph.py index 01f580a35..daf220f2f 100644 --- a/miasm/core/graph.py +++ b/miasm/core/graph.py @@ -315,11 +315,28 @@ def predecessors_stop_node_iter(self, node, head): for next_node in self.predecessors_iter(node): yield next_node + def successors_stop_node_iter(self, node, tail): + if node == tail: + return + for next_node in self.successors_iter(node): + yield next_node + def reachable_sons(self, head): """Compute all nodes reachable from node @head. Each son is an immediate successor of an arbitrary, already yielded son of @head""" return self._reachable_nodes(head, self.successors_iter) + def reachable_sons_stop_node(self, leaf, tail): + """Compute all sons of node @leaf. Each son is an immediate + predecessor of an arbitrary, already yielded son of @leaf. + Do not compute reachables past @tail node""" + return self._reachable_nodes( + leaf, + lambda node_cur: self.successors_stop_node_iter( + node_cur, tail + ) + ) + def reachable_parents(self, leaf): """Compute all parents of node @leaf. Each parent is an immediate predecessor of an arbitrary, already yielded parent of @leaf""" diff --git a/miasm/expression/expression.py b/miasm/expression/expression.py index 6f171d93f..925d83463 100644 --- a/miasm/expression/expression.py +++ b/miasm/expression/expression.py @@ -73,6 +73,64 @@ [ '-' ], # Unary '-', associativity with + not handled ] +COND_EQ = 0 +COND_NE = 1 +COND_CS = 2 +COND_CC = 3 +COND_MI = 4 +COND_PL = 5 +COND_VS = 6 +COND_VC = 7 +COND_HI = 8 +COND_LS = 9 +COND_GE = 10 +COND_LT = 11 +COND_GT = 12 +COND_LE = 13 +COND_AL = 14 +COND_NV = 15 + +cond_dct = { + COND_EQ: "EQ", + COND_NE: "NE", + COND_CS: "CS", + COND_CC: "CC", + COND_MI: "MI", + COND_PL: "PL", + COND_VS: "VS", + COND_VC: "VC", + COND_HI: "HI", + COND_LS: "LS", + COND_GE: "GE", + COND_LT: "LT", + COND_GT: "GT", + COND_LE: "LE", + COND_AL: "AL", + # COND_NV: "NV", +} + +cond_dct_inv = dict((name, num) for num, name in viewitems(cond_dct)) + + +""" +Code Meaning (for cmp or subs) Flags Tested +eq Equal. Z==1 +ne Not equal. Z==0 +cs or hs Unsigned higher or same (or carry set). C==1 +cc or lo Unsigned lower (or carry clear). C==0 +mi Negative. The mnemonic stands for "minus". N==1 +pl Positive or zero. The mnemonic stands for "plus". N==0 +vs Signed overflow. The mnemonic stands for "V set". V==1 +vc No signed overflow. The mnemonic stands for "V clear". V==0 +hi Unsigned higher. (C==1) && (Z==0) +ls Unsigned lower or same. (C==0) || (Z==1) +ge Signed greater than or equal. N==V +lt Signed less than. N!=V +gt Signed greater than. (Z==0) && (N==V) +le Signed less than or equal. (Z==1) || (N!=V) +al (or omitted) Always executed. None tested. +""" + # dictionary from 'op' to priority, derived from above priorities = dict((op, prio) for prio, l in enumerate(priorities_list)