From 60ddf0400e69f2e22c12c2a0a64913bbfe21e7ba Mon Sep 17 00:00:00 2001 From: "Daniel Plohmann (jupiter)" Date: Thu, 29 Oct 2020 17:47:10 +0100 Subject: [PATCH] addressing review --- capa/features/extractors/smda/basicblock.py | 30 ++++++++++--------- capa/features/extractors/smda/insn.py | 32 +++++++++------------ 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/capa/features/extractors/smda/basicblock.py b/capa/features/extractors/smda/basicblock.py index 432f5dcf6..4d130505b 100644 --- a/capa/features/extractors/smda/basicblock.py +++ b/capa/features/extractors/smda/basicblock.py @@ -68,6 +68,22 @@ def is_mov_imm_to_stack(smda_ins): return True +def is_printable_ascii(chars): + if sys.version_info[0] >= 3: + return all(c < 127 and chr(c) in string.printable for c in chars) + else: + return all(ord(c) < 127 and c in string.printable for c in chars) + + +def is_printable_utf16le(chars): + if sys.version_info[0] >= 3: + if all(c == 0x00 for c in chars[1::2]): + return is_printable_ascii(chars[::2]) + else: + if all(c == "\x00" for c in chars[1::2]): + return is_printable_ascii(chars[::2]) + + def get_printable_len(instr): """ Return string length if all operand bytes are ascii or utf16-le printable @@ -91,20 +107,6 @@ def get_printable_len(instr): else: raise ValueError("Unhandled operand data type 0x%x." % instr.imm_size) - def is_printable_ascii(chars): - if sys.version_info[0] >= 3: - return all(c < 127 and chr(c) in string.printable for c in chars) - else: - return all(ord(c) < 127 and c in string.printable for c in chars) - - def is_printable_utf16le(chars): - if sys.version_info[0] >= 3: - if all(c == 0x00 for c in chars[1::2]): - return is_printable_ascii(chars[::2]) - else: - if all(c == "\x00" for c in chars[1::2]): - return is_printable_ascii(chars[::2]) - if is_printable_ascii(chars): return instr.imm_size if is_printable_utf16le(chars): diff --git a/capa/features/extractors/smda/insn.py b/capa/features/extractors/smda/insn.py index 0b2b4b3ea..e33d86c49 100644 --- a/capa/features/extractors/smda/insn.py +++ b/capa/features/extractors/smda/insn.py @@ -18,9 +18,11 @@ # security cookie checks may perform non-zeroing XORs, these are expected within a certain # byte range within the first and returning basic blocks, this helps to reduce FP features SECURITY_COOKIE_BYTES_DELTA = 0x40 +PATTERN_HEXNUM = re.compile(r"[+\-] (?P0x[a-fA-F0-9]+)") +PATTERN_SINGLENUM = re.compile(r"[+\-] (?P[0-9])") -def get_arch(smda_report: SmdaReport): +def get_arch(smda_report): if smda_report.architecture == "intel": if smda_report.bitness == 32: return ARCH_X32 @@ -52,8 +54,8 @@ def extract_insn_api_features(f, bb, insn): # reformat dll_name, api_name = api_entry.split("!") dll_name = dll_name.split(".")[0] - name = dll_name + "." + api_name - yield API(name), insn.offset + for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name): + yield API(name), insn.offset def extract_insn_number_features(f, bb, insn): @@ -106,7 +108,7 @@ def extract_insn_bytes_features(f, bb, insn): yield Bytes(bytes_read), insn.offset -def detectAsciiLen(smda_report, offset): +def detect_ascii_len(smda_report, offset): if smda_report.buffer is None: return 0 ascii_len = 0 @@ -121,7 +123,7 @@ def detectAsciiLen(smda_report, offset): return 0 -def detectUnicodeLen(smda_report, offset): +def detect_unicode_len(smda_report, offset): if smda_report.buffer is None: return 0 unicode_len = 0 @@ -139,10 +141,10 @@ def detectUnicodeLen(smda_report, offset): def read_string(smda_report, offset): - alen = detectAsciiLen(smda_report, offset) + alen = detect_ascii_len(smda_report, offset) if alen > 1: return read_bytes(smda_report, offset, alen).decode("utf-8") - ulen = detectUnicodeLen(smda_report, offset) + ulen = detect_unicode_len(smda_report, offset) if ulen > 2: return read_bytes(smda_report, offset, ulen).decode("utf-16") @@ -167,8 +169,8 @@ def extract_insn_offset_features(f, bb, insn): operands = [o.strip() for o in insn.operands.split(",")] for operand in operands: number = None - number_hex = re.search(r"[+\-] (?P0x[a-fA-F0-9]+)", operand) - number_int = re.search(r"[+\-] (?P[0-9])", operand) + number_hex = re.search(PATTERN_HEXNUM, operand) + number_int = re.search(PATTERN_SINGLENUM, operand) if number_hex: number = int(number_hex.group("num"), 16) number = -1 * number if number_hex.group().startswith("-") else number @@ -241,20 +243,12 @@ def extract_insn_segment_access_features(f, bb, insn): """ parse the instruction for access to fs or gs """ operands = [o.strip() for o in insn.operands.split(",")] for operand in operands: - if "fs:" in operand and "0x30" in operand: + if "fs:" in operand: yield Characteristic("fs access"), insn.offset - elif "gs:" in operand and "0x60" in operand: + elif "gs:" in operand: yield Characteristic("gs access"), insn.offset -def get_section(vw, va): - for start, length, _, __ in vw.getMemoryMaps(): - if start <= va < start + length: - return start - - raise KeyError(va) - - def extract_insn_cross_section_cflow(f, bb, insn): """ inspect the instruction for a CALL or JMP that crosses section boundaries.