Skip to content

Commit

Permalink
addressing review
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Plohmann (jupiter) committed Oct 29, 2020
1 parent 669d348 commit 60ddf04
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 33 deletions.
30 changes: 16 additions & 14 deletions capa/features/extractors/smda/basicblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,22 @@ def is_mov_imm_to_stack(smda_ins):
return True


def is_printable_ascii(chars):
if sys.version_info[0] >= 3:
return all(c < 127 and chr(c) in string.printable for c in chars)
else:
return all(ord(c) < 127 and c in string.printable for c in chars)


def is_printable_utf16le(chars):
if sys.version_info[0] >= 3:
if all(c == 0x00 for c in chars[1::2]):
return is_printable_ascii(chars[::2])
else:
if all(c == "\x00" for c in chars[1::2]):
return is_printable_ascii(chars[::2])


def get_printable_len(instr):
"""
Return string length if all operand bytes are ascii or utf16-le printable
Expand All @@ -91,20 +107,6 @@ def get_printable_len(instr):
else:
raise ValueError("Unhandled operand data type 0x%x." % instr.imm_size)

def is_printable_ascii(chars):
if sys.version_info[0] >= 3:
return all(c < 127 and chr(c) in string.printable for c in chars)
else:
return all(ord(c) < 127 and c in string.printable for c in chars)

def is_printable_utf16le(chars):
if sys.version_info[0] >= 3:
if all(c == 0x00 for c in chars[1::2]):
return is_printable_ascii(chars[::2])
else:
if all(c == "\x00" for c in chars[1::2]):
return is_printable_ascii(chars[::2])

if is_printable_ascii(chars):
return instr.imm_size
if is_printable_utf16le(chars):
Expand Down
32 changes: 13 additions & 19 deletions capa/features/extractors/smda/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
# byte range within the first and returning basic blocks, this helps to reduce FP features
SECURITY_COOKIE_BYTES_DELTA = 0x40
PATTERN_HEXNUM = re.compile(r"[+\-] (?P<num>0x[a-fA-F0-9]+)")
PATTERN_SINGLENUM = re.compile(r"[+\-] (?P<num>[0-9])")


def get_arch(smda_report: SmdaReport):
def get_arch(smda_report):
if smda_report.architecture == "intel":
if smda_report.bitness == 32:
return ARCH_X32
Expand Down Expand Up @@ -52,8 +54,8 @@ def extract_insn_api_features(f, bb, insn):
# reformat
dll_name, api_name = api_entry.split("!")
dll_name = dll_name.split(".")[0]
name = dll_name + "." + api_name
yield API(name), insn.offset
for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name):
yield API(name), insn.offset


def extract_insn_number_features(f, bb, insn):
Expand Down Expand Up @@ -106,7 +108,7 @@ def extract_insn_bytes_features(f, bb, insn):
yield Bytes(bytes_read), insn.offset


def detectAsciiLen(smda_report, offset):
def detect_ascii_len(smda_report, offset):
if smda_report.buffer is None:
return 0
ascii_len = 0
Expand All @@ -121,7 +123,7 @@ def detectAsciiLen(smda_report, offset):
return 0


def detectUnicodeLen(smda_report, offset):
def detect_unicode_len(smda_report, offset):
if smda_report.buffer is None:
return 0
unicode_len = 0
Expand All @@ -139,10 +141,10 @@ def detectUnicodeLen(smda_report, offset):


def read_string(smda_report, offset):
alen = detectAsciiLen(smda_report, offset)
alen = detect_ascii_len(smda_report, offset)
if alen > 1:
return read_bytes(smda_report, offset, alen).decode("utf-8")
ulen = detectUnicodeLen(smda_report, offset)
ulen = detect_unicode_len(smda_report, offset)
if ulen > 2:
return read_bytes(smda_report, offset, ulen).decode("utf-16")

Expand All @@ -167,8 +169,8 @@ def extract_insn_offset_features(f, bb, insn):
operands = [o.strip() for o in insn.operands.split(",")]
for operand in operands:
number = None
number_hex = re.search(r"[+\-] (?P<num>0x[a-fA-F0-9]+)", operand)
number_int = re.search(r"[+\-] (?P<num>[0-9])", operand)
number_hex = re.search(PATTERN_HEXNUM, operand)
number_int = re.search(PATTERN_SINGLENUM, operand)
if number_hex:
number = int(number_hex.group("num"), 16)
number = -1 * number if number_hex.group().startswith("-") else number
Expand Down Expand Up @@ -241,20 +243,12 @@ def extract_insn_segment_access_features(f, bb, insn):
""" parse the instruction for access to fs or gs """
operands = [o.strip() for o in insn.operands.split(",")]
for operand in operands:
if "fs:" in operand and "0x30" in operand:
if "fs:" in operand:
yield Characteristic("fs access"), insn.offset
elif "gs:" in operand and "0x60" in operand:
elif "gs:" in operand:
yield Characteristic("gs access"), insn.offset


def get_section(vw, va):
for start, length, _, __ in vw.getMemoryMaps():
if start <= va < start + length:
return start

raise KeyError(va)


def extract_insn_cross_section_cflow(f, bb, insn):
"""
inspect the instruction for a CALL or JMP that crosses section boundaries.
Expand Down

0 comments on commit 60ddf04

Please sign in to comment.