From 09fdc258baf244b74bb72fd585a187b0450a29f1 Mon Sep 17 00:00:00 2001 From: cooki35 <33752572+cooki35@users.noreply.github.com> Date: Tue, 29 Nov 2022 14:39:46 +0100 Subject: [PATCH 1/4] Add support for CSV like input and do some minor refactoring. Add the input format InputFormat.CSV in parse_lines. In parse_line call the function parse_line_csv if the input format is InputFormat.CSV. The parse_line_csv function extracts the instruction from the line and generates an Instruction instance. Creat an abstract base class TargetInfo for the X86 and ARM target infos. Make the class InputFormat inherit from Enum. Move the classes to the top of the file. Add some comments to document the code. --- src/asm2cfg/asm2cfg.py | 302 +++++++++++++++++++++++++---------------- 1 file changed, 188 insertions(+), 114 deletions(-) diff --git a/src/asm2cfg/asm2cfg.py b/src/asm2cfg/asm2cfg.py index 2c426ae..87db83e 100644 --- a/src/asm2cfg/asm2cfg.py +++ b/src/asm2cfg/asm2cfg.py @@ -2,29 +2,70 @@ Module containing main building blocks to parse assembly and draw CFGs. """ +from abc import ABC, abstractmethod import re import sys import tempfile - +from enum import Enum from graphviz import Digraph # TODO: make this a command-line flag VERBOSE = 0 +# Common regexes +HEX_PATTERN = r'[0-9a-fA-F]+' +HEX_LONG_PATTERN = r'(?:0x0*)?' + HEX_PATTERN -def escape(instruction): + +class InputFormat(Enum): """ - Escape used dot graph characters in given instruction so they will be - displayed correctly. + An enum which represents various supported input formats """ - instruction = instruction.replace('<', r'\<') - instruction = instruction.replace('>', r'\>') - instruction = instruction.replace('|', r'\|') - instruction = instruction.replace('{', r'\{') - instruction = instruction.replace('}', r'\}') - instruction = instruction.replace(' ', ' ') - return instruction + GDB = 'GDB' + OBJDUMP = 'OBJDUMP' + CSV = 'CSV' + + +class JumpTable: + """ + Holds info about branch sources and destinations in asm function. + """ + + def __init__(self, instructions): + # Address where the jump begins and value which address + # to jump to. This also includes calls. + self.abs_sources = {} + self.rel_sources = {} + + # Addresses where jumps end inside the current function. + self.abs_destinations = set() + self.rel_destinations = set() + + # Iterate over the lines and collect jump targets and branching points. + for inst in instructions: + if inst is None or not inst.is_direct_jump(): + continue + + self.abs_sources[inst.address.abs] = inst.target + self.abs_destinations.add(inst.target.abs) + + self.rel_sources[inst.address.offset] = inst.target + self.rel_destinations.add(inst.target.offset) + + def is_destination(self, address): + if address.abs is not None: + return address.abs in self.abs_destinations + if address.offset is not None: + return address.offset in self.rel_destinations + return False + + def get_target(self, address): + if address.abs is not None: + return self.abs_sources.get(address.abs) + if address.offset is not None: + return self.rel_sources.get(address.offset) + return None class BasicBlock: @@ -85,69 +126,11 @@ def __repr__(self): return '\n'.join([i.text for i in self.instructions]) -def print_assembly(basic_blocks): - """ - Debug function to print the assembly. - """ - for basic_block in basic_blocks.values(): - print(basic_block) - - -def read_lines(file_path): - """ Read lines from the file and return then as a list. """ - lines = [] - with open(file_path, 'r', encoding='utf8') as asm_file: - lines = asm_file.readlines() - return lines - - -# Common regexes -HEX_PATTERN = r'[0-9a-fA-F]+' -HEX_LONG_PATTERN = r'(?:0x0*)?' + HEX_PATTERN - - -class InputFormat: # pylint: disable=too-few-public-methods - """ - An enum which represents various supported input formats - """ - GDB = 'GDB' - OBJDUMP = 'OBJDUMP' - - -def parse_function_header(line): - """ - Return function name of memory range from the given string line. - - Match lines for non-stripped binaries: - 'Dump of assembler code for function test_function:' - lines for stripped binaries: - 'Dump of assembler code from 0x555555555faf to 0x555555557008:' - and lines for obdjdump disassembly: - '0000000000016bb0 <_obstack_allocated_p@@Base>:' - """ - - objdump_name_pattern = re.compile(fr'{HEX_PATTERN} <([a-zA-Z_0-9@.]+)>:') - function_name = objdump_name_pattern.search(line) - if function_name is not None: - return InputFormat.OBJDUMP, function_name[1] - - function_name_pattern = re.compile(r'function (\w+):$') - function_name = function_name_pattern.search(line) - if function_name is not None: - return InputFormat.GDB, function_name[1] - - memory_range_pattern = re.compile(fr'(?:Address range|from) ({HEX_LONG_PATTERN}) to ({HEX_LONG_PATTERN}):$') - memory_range = memory_range_pattern.search(line) - if memory_range is not None: - return InputFormat.GDB, f'{memory_range[1]}-{memory_range[2]}' - - return None, None - - class Address: """ Represents location in program which may be absolute or relative """ + def __init__(self, abs_addr, base=None, offset=None): self.abs = abs_addr self.base = base @@ -182,6 +165,7 @@ class Encoding: e.g. the '31 c0' in '16bd3: 31 c0 xor %eax,%eax' """ + def __init__(self, bites): self.bites = bites @@ -192,7 +176,46 @@ def __str__(self): return ' '.join(map(lambda b: f'{b:#x}', self.bites)) -class X86TargetInfo: +class TargetInfo(ABC): + """ + Abstract class, contains instruction info for the targets. + """ + + def __init__(self): + pass + + @abstractmethod + def comment(self): + """ + Returns the comment symbol for the target. + """ + + @abstractmethod + def is_call(self, instruction): + """ + Returns True if the instruction is of type call. + """ + + @abstractmethod + def is_jump(self, instruction): + """ + Returns True if the instruction is of type jump. + """ + + @abstractmethod + def is_unconditional_jump(self, instruction): + """ + Returns True if the instruction is an is_unconditional jump. + """ + + @abstractmethod + def is_sink(self, instruction): + """ + Is this an instruction which terminates function execution e.g. return? + """ + + +class X86TargetInfo(TargetInfo): """ Contains instruction info for X86-compatible targets. """ @@ -223,7 +246,7 @@ def is_sink(self, instruction): return instruction.opcode.startswith('ret') -class ARMTargetInfo: +class ARMTargetInfo(TargetInfo): """ Contains instruction info for ARM-compatible targets. """ @@ -266,6 +289,7 @@ class Instruction: Represents a single assembly instruction with it operands, location and optional branch target """ + def __init__(self, body, text, lineno, address, opcode, ops, target, imm, target_info): # noqa self.body = body self.text = text @@ -303,6 +327,69 @@ def __str__(self): return result +def escape(instruction): + """ + Escape used dot graph characters in given instruction so they will be + displayed correctly. + """ + instruction = instruction.replace('<', r'\<') + instruction = instruction.replace('>', r'\>') + instruction = instruction.replace('|', r'\|') + instruction = instruction.replace('{', r'\{') + instruction = instruction.replace('}', r'\}') + instruction = instruction.replace(' ', ' ') + return instruction + + +def print_assembly(basic_blocks): + """ + Debug function to print the assembly. + """ + for basic_block in basic_blocks.values(): + print(basic_block) + + +def read_lines(file_path): + """ Read lines from the file and return then as a list. """ + lines = [] + with open(file_path, 'r', encoding='utf8') as asm_file: + lines = asm_file.readlines() + return lines + + +def parse_function_header(line): + """ + Return function name of memory range from the given string line. + + Match lines for non-stripped binaries: + 'Dump of assembler code for function test_function:' + lines for stripped binaries: + 'Dump of assembler code from 0x555555555faf to 0x555555557008:' + and lines for obdjdump disassembly: + '0000000000016bb0 <_obstack_allocated_p@@Base>:' + """ + + objdump_name_pattern = re.compile(fr'{HEX_PATTERN} <([a-zA-Z_0-9@.]+)>:') + function_name = objdump_name_pattern.search(line) + if function_name is not None: + return InputFormat.OBJDUMP, function_name[1] + + function_name_pattern = re.compile(r'function (\w+):$') + function_name = function_name_pattern.search(line) + if function_name is not None: + return InputFormat.GDB, function_name[1] + + memory_range_pattern = re.compile(fr'(?:Address range|from) ({HEX_LONG_PATTERN}) to ({HEX_LONG_PATTERN}):$') + memory_range = memory_range_pattern.search(line) + if memory_range is not None: + return InputFormat.GDB, f'{memory_range[1]}-{memory_range[2]}' + + if line.strip() == 'address;bytes;operator;operand': + return InputFormat.CSV, None + + return None, None + + def parse_address(line): """ Parses leading address of instruction @@ -397,10 +484,30 @@ def parse_comment(line, target_info): return target, imm_match[3] +def parse_line_csv(line: str, lineno, target_info): + """ + Parse a single line of assembly to create an Instruction instance. + """ + original_line = line + elements: list[str] = line.split(";") + addr: Address = Address(int(elements[0])) + operands: str = elements[3] + target: Address | None = None + match = re.match(r"^[\d]+$", operands) + if match: + target = Address(int(operands)) + txt = original_line.strip() + return Instruction(body=None, text=txt, lineno=lineno, address=addr, opcode=elements[2], ops=operands, target=target, imm=None, target_info=target_info) + + def parse_line(line, lineno, function_name, fmt, target_info): """ Parses a single line of assembly to create Instruction instance """ + original_line = line + + if fmt == InputFormat.CSV: + return parse_line_csv(line, lineno, target_info) # Strip GDB prefix and leading whites if line.startswith('=> '): @@ -417,7 +524,6 @@ def parse_line(line, lineno, function_name, fmt, target_info): if not line: return encoding - original_line = line body, opcode, ops, line = parse_body(line, target_info) if opcode is None: return None @@ -438,47 +544,6 @@ def parse_line(line, lineno, function_name, fmt, target_info): return Instruction(body, original_line.strip(), lineno, address, opcode, ops, target, imm, target_info) -class JumpTable: - """ - Holds info about branch sources and destinations in asm function. - """ - - def __init__(self, instructions): - # Address where the jump begins and value which address - # to jump to. This also includes calls. - self.abs_sources = {} - self.rel_sources = {} - - # Addresses where jumps end inside the current function. - self.abs_destinations = set() - self.rel_destinations = set() - - # Iterate over the lines and collect jump targets and branching points. - for inst in instructions: - if inst is None or not inst.is_direct_jump(): - continue - - self.abs_sources[inst.address.abs] = inst.target - self.abs_destinations.add(inst.target.abs) - - self.rel_sources[inst.address.offset] = inst.target - self.rel_destinations.add(inst.target.offset) - - def is_destination(self, address): - if address.abs is not None: - return address.abs in self.abs_destinations - if address.offset is not None: - return address.offset in self.rel_destinations - return False - - def get_target(self, address): - if address.abs is not None: - return self.abs_sources.get(address.abs) - if address.offset is not None: - return self.rel_sources.get(address.offset) - return None - - def parse_lines(lines, skip_calls, target_name): # noqa pylint: disable=unused-argument if target_name == 'x86': target_info = X86TargetInfo() @@ -492,6 +557,8 @@ def parse_lines(lines, skip_calls, target_name): # noqa pylint: disable=unused- current_function_name = current_format = None for num, line in enumerate(lines, 1): fmt, function_name = parse_function_header(line) + if fmt == InputFormat.CSV: + function_name = "CSV" if function_name is not None: assert current_function_name is None, 'we handle only one function for now' if VERBOSE: @@ -501,6 +568,7 @@ def parse_lines(lines, skip_calls, target_name): # noqa pylint: disable=unused- continue instruction_or_encoding = parse_line(line, num, current_function_name, current_format, target_info) + # print(instruction_or_encoding) if isinstance(instruction_or_encoding, Encoding): # Partial encoding for previous instruction, skip it continue @@ -508,12 +576,18 @@ def parse_lines(lines, skip_calls, target_name): # noqa pylint: disable=unused- instructions.append(instruction_or_encoding) continue + # Ignore the last line of gdb informing about the end of the dump if line.startswith('End of assembler dump') or not line: continue + # Ignore empty lines if line.strip() == '': continue + # Ignore the header of the CSV file + if line.strip() == 'address;bytes;operator;operand': + continue + print(f'Unexpected assembly at line {num}:\n {line}') sys.exit(1) From fe4aad5616e8e514c8ef718b60e599df40dd6d98 Mon Sep 17 00:00:00 2001 From: cooki35 <33752572+cooki35@users.noreply.github.com> Date: Tue, 29 Nov 2022 15:36:45 +0100 Subject: [PATCH 2/4] Fix some linting problems. --- src/asm2cfg/asm2cfg.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/asm2cfg/asm2cfg.py b/src/asm2cfg/asm2cfg.py index 87db83e..fbcae1e 100644 --- a/src/asm2cfg/asm2cfg.py +++ b/src/asm2cfg/asm2cfg.py @@ -22,6 +22,7 @@ class InputFormat(Enum): """ An enum which represents various supported input formats """ + GDB = 'GDB' OBJDUMP = 'OBJDUMP' CSV = 'CSV' @@ -489,15 +490,25 @@ def parse_line_csv(line: str, lineno, target_info): Parse a single line of assembly to create an Instruction instance. """ original_line = line - elements: list[str] = line.split(";") + elements: list[str] = line.split(';') addr: Address = Address(int(elements[0])) operands: str = elements[3] target: Address | None = None - match = re.match(r"^[\d]+$", operands) + match = re.match(r'^[\d]+$', operands) if match: target = Address(int(operands)) txt = original_line.strip() - return Instruction(body=None, text=txt, lineno=lineno, address=addr, opcode=elements[2], ops=operands, target=target, imm=None, target_info=target_info) + return Instruction( + body=None, + text=txt, + lineno=lineno, + address=addr, + opcode=elements[2], + ops=operands, + target=target, + imm=None, + target_info=target_info, + ) def parse_line(line, lineno, function_name, fmt, target_info): @@ -558,7 +569,7 @@ def parse_lines(lines, skip_calls, target_name): # noqa pylint: disable=unused- for num, line in enumerate(lines, 1): fmt, function_name = parse_function_header(line) if fmt == InputFormat.CSV: - function_name = "CSV" + function_name = 'CSV' if function_name is not None: assert current_function_name is None, 'we handle only one function for now' if VERBOSE: From 3186bcb54795f9953eddcfbc72f79798e82bd293 Mon Sep 17 00:00:00 2001 From: cooki35 <33752572+cooki35@users.noreply.github.com> Date: Tue, 29 Nov 2022 15:39:20 +0100 Subject: [PATCH 3/4] Update the README. Add that asm2cfg also supports CSV input. Rewrite some text. --- README.md | 234 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 120 insertions(+), 114 deletions(-) diff --git a/README.md b/README.md index c52e514..13bb21d 100644 --- a/README.md +++ b/README.md @@ -5,19 +5,20 @@ [![Total alerts](https://img.shields.io/lgtm/alerts/g/Kazhuu/asm2cfg.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/Kazhuu/asm2cfg/alerts/) [![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/Kazhuu/asm2cfg.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/Kazhuu/asm2cfg/context:python) -Python command-line tool and GDB extension to view and save x86, ARM and objdump -assembly files as control-flow graph (CFG) pdf files. From GDB debugging session -use `viewcfg` command to view CFG and use `savecfg` command to save it to the -pdf file. +Asm2cfg is a python command-line tool and GDB extension to view and save x86 and +ARM assembly files from GDB, objdump or CSV files as control-flow graph (CFG) +pdf files. +From a GDB debugging session use the `viewcfg` command to view CFG and use +the `savecfg` command to save it to a pdf file.

-Program has been developed to support X86, ARM and objdump assembly outputs. -Program is mostly tested with x86 assembly. ARM and objdump formats might not be -fully supported. If you have any suggestions or find bugs, please open an issue -or create a pull request. If you want to contribute, check +Asm2cfg has been developed to support X86, ARM, objdump, GDB and CSV assembly +outputs. The program is mostly tested with x86 assembly. ARM, objdump and CSV +formats might not be fully supported. If you have any suggestions or find bugs, +please open an issue or create a pull request. If you want to contribute, check [Development](#development) how to get started. ## Table of Content @@ -26,10 +27,10 @@ or create a pull request. If you want to contribute, check * [Install](#install) * [Usage From GDB](#usage-from-gdb) -* [Usage as Standalone](#usage-as-standalone) - * [Knowing Function Name](#knowing-function-name) +* [Standalone Usage](#standalone-usage) + * [Get Function Names](#get-function-names) * [Disassemble Function](#disassemble-function) - * [Draw CFG](#draw-cfg) + * [Draw CFG](#draw-cfgs) * [Examples](#examples) * [Development](#development) * [Python Environment](#python-environment) @@ -43,102 +44,107 @@ or create a pull request. If you want to contribute, check ## Install -Project can be installed with pip +The project can be installed using pip: ``` pip install asm2cfg ``` -To be able to view the dot files from GDB. External dot viewer is required. For -this purpose [xdot](https://pypi.org/project/xdot/) can be used for example. Any -other dot viewer will also do. To install this on Debian based distro run +To be able to view the dot files from GDB an external dot viewer is required. +For this purpose use e.g., [xdot](https://pypi.org/project/xdot/), but any +other dot viewer will also do. +To install xdot on Debian based distros run: ``` sudo apt install xdot ``` -Or Arch based +On Arch based systems run: ``` sudo pacman -S xdot ``` -To add extension to GDB you need to source the pip installed plugin to it. To -find where pip placed GDB extension run `which gdb_asm2cfg` or in case if you +To add the extension to GDB you need to source the pip installed plugin. To +find where pip placed the GDB extension run `which gdb_asm2cfg` or if you use pyenv use `pyenv which gdb_asm2cfg`. Copy the path to the clipboard. -Then in you home directory if not already add `.gdbinit` file -and place following line in it and replace path from the earlier step. +Then in your home directory, if not already there, add the `.gdbinit` file +and place following line in it and replace path from the result from the +previous step. ``` source ``` -For example in my Linux machine line end up to be +For example on my Linux machine I end up with the following line: ``` source ~/.local/bin/gdb_asm2cfg.py ``` -Now when you start GDB no errors should be displayed and you are ready to go. +Now when you start GDB, there should be no errors and you are ready to go. ## Usage From GDB -In GDB session this extension provides command `viewcfg` to view CFG with -external dot viewer. Command `savecfg` saves the CFG to pdf file to current -working directory with same name as the function being dumped. Both commands -disassemble the current execution frame/function when the command is issued. To -see help for these commands use `help` command like `help viewcfg`. +In the GDB session this extension provides the command `viewcfg` to view the CFG +with an external dot viewer. The command `savecfg` saves the CFG to a pdf file +located in the current working directory with same name as the function being +dumped. Both commands disassemble the current execution frame/function when the +command is issued. To see the help for these commands use the `help` command +e.g, `help viewcfg`. -For example let's view main function from you favorite non-stripped executable. -First run GDB until main function +For example let's view the main function of your favorite non-stripped +executable. First run GDB until main function: ``` gdb -ex 'b main' -ex 'run' ``` -Now run `viewcfg` to view CFG as a dot graph with external editor. Or run `savecfg` -to save CFG to pdf file named `main.pdf` to current working directory. If -function is stripped then memory address of the function will used as a name -instead. For example `0x555555555faf-0x555555557008.pdf`. +Now run `viewcfg` to view the CFG as a dot graph with an external editor. +Or run `savecfg` to save the CFG as a pdf file named `main.pdf` to current +working directory. If the function is stripped the memory address of the +function will be used as the name instead. +For example `0x555555555faf-0x555555557008.pdf`. -If assembly function is very large with a lot of jumps and calls to other +If the assembly function is very large with a lot of jumps and calls to other functions. Then rendering the CFG can take a long time. So be patient or cancel -rendering with Ctrl-C. To make the rendering faster you can skip function calls -instructions from splitting the code to more blocks. To set this run `set -skipcalls on` and then run earlier command again. Note that if function is long -and has a lot of jumps inside itself, then rendering is still gonna take a long -time. To have normal behavior again run `set skipcalls off`. +rendering with Ctrl-C. To make the rendering faster you can skip function call +instructions from splitting the code into more blocks. To enable this option +run `set skipcalls on` and then run the previous command again. Note that if +the function is long and has a lot of jumps inside itself, then rendering will +still gonna take a long time. To reset to the normal behavior again, run +`set skipcalls off`. -## Usage as Standalone +## Standalone Usage -This method can be used with assembly files saved from ouput of objdump and GDB -disassembly. Pip installation will come with `asm2cfg` command-line tool for -this purpose. +Asm2cfg can also be used on (dis)-assembly files saved from the output of +objdump and GDB. The pip installation comes with the `asm2cfg` command-line +tool for this purpose. To use as standalone script you first need to dump assembly from GDB or objdump to the file which is explained below. -### Knowing Function Name +### Get Function Names -If you don't know the name of function you're looking for then you can also list -all function names using GDB: +If you don't know the name of the function you're looking for then you can also +list all function names using GDB: ``` gdb -batch -ex 'b main' -ex r -ex 'info functions' ./test_executable ``` -This will set breakpoint at function `main`, then +This will set a breakpoint at the function `main`, then run the program and print symbols from all loaded libraries. -For functions which come from main executable you can avoid running the program -and simply do +For functions which come from the main executable you can avoid running the +program and simply do: ``` gdb -batch -ex 'info functions' ./test_executable ``` -If you want to narrow the search down you can also use regexp +If you want to narrow the search down you can also use regexp: ``` gdb ... -ex 'info functions ' ... @@ -146,22 +152,22 @@ gdb ... -ex 'info functions ' ... ### Disassemble Function -Once you have the function name, you can produce its disassembly via +Once you have the function name, you can produce its disassembly via: ``` gdb -batch -ex 'b main' -ex r -ex 'pipe disassemble test_function | tee test_function.asm' ./test_executable ``` -or +Or: ``` gdb -batch -ex 'set breakpoints pending on' -ex 'b test_function' -ex r -ex 'pipe disassemble | tee test_function.asm' ./test_executable ``` -(the `set breakpoint pending on` command enables pending breakpoints and -could be added to your `.gdbinit` instead) +(The `set breakpoint pending on` command enables pending breakpoints and +could be added to your `.gdbinit` instead.) -For functions from main executable it's enough to do +For functions from the main executable it's enough to do: ``` gdb -batch -ex 'pipe disassemble test_function | tee test_function.asm' ./test_executable @@ -173,57 +179,56 @@ You can also extract function's disassembly from `objdump` output: objdump -d ./test_executable | sed -ne '/ test_executable.asm ``` -(this may be useful for specific non-native targets which lack GDB support). +(This may be useful for specific non-native targets which lack GDB support.) -### Draw CFG +### Draw CFGs -Now you have the assembly file. Time to turn that to CFG pdf file. Do that by giving it -to `asm2cfg` command-line tool like so +Now, with the assembly file, it is time to use asm2cfg to generate a pdf file +containing the CFG. Just give the assembly file to the `asm2cfg` +command-line tool: ``` asm2cfg test_function.asm ``` -Asm2cfg by default expects x86 assembly files. If you want to use ARM assembly files, -then provide `--target arm` command-line flag. +Asm2cfg by default expects x86 assembly files. If you want to use ARM assembly +files, then provide the `--target arm` command-line flag. -Above command should output `test_function.pdf` file in the same directory where -the executable was ran. If the assembly file is stripped then the function -memory range is used as a name instead. For example -`0x555555555faf-0x555555557008.pdf`. +The above command outputs the `test_function.pdf` pdf file in the same +directory. If the assembly file is stripped then the function memory range is +used as a name instead. For example `0x555555555faf-0x555555557008.pdf`. -To view CFG instead of saving provide `-v` flag. And to skip function calls from -splitting the code to further blocks provide `-c` flag. To show the help use -`-h`. +To view the CFG instead of saving it, provide the `-v` flag. And to prohibit +function calls from splitting the code into further blocks provide the `-c` +flag. To show the help use `-h`. ### Examples -Repository includes examples which can be used to test the standalone +The repository includes examples which can be used to test the standalone functionality for x86, ARM and objdump. -File `test_function.asm` is non-stripped assembly file and its -corresponding output `test_function.pdf`. +The file `test_function.asm` is a non-stripped assembly file and its +corresponding output file is `test_function.pdf`. -File `stripped_function.asm` contains -stripped function and its corresponding output -`stripped_function.pdf`. +The file `stripped_function.asm` contains a stripped function and its +corresponding output file is `stripped_function.pdf`. -File `att_syntax.asm` is an example of non-stripped AT&T assembly. +The file `att_syntax.asm` is an example of a non-stripped AT&T assembly file. -File `huge.asm` is a large stripped -assembly function and its corresponding output `huge.pdf`. This can be used to -test processing time of big functions. +The file `huge.asm` is a large stripped assembly function and its +corresponding output file is `huge.pdf`. This file can be used to +test the processing time of big functions. -Files `objdump.asm` and `stripped_objdump.asm` are the regular and stripped +The files `objdump.asm` and `stripped_objdump.asm` are the regular and stripped objdump-based disassemblies of short functions. -File `arm.asm` is ARM based assembly file and its corresponding pdf file is +The file `arm.asm` is ARM based assembly file and its corresponding pdf file is `arm.pdf`. ## Development You want to contribute? You're very welcome to do so! This section will give you -guidance how to setup development environment and test things locally. +guidance on how to setup the development environment and test things locally. ### Python Environment @@ -234,23 +239,23 @@ normal pip and virtualenv usage. Install pipenv for your system following the guide [here](https://pipenv.pypa.io/en/latest/). -After installing pipenv. Create virtual environment and install all required -packages to it. Run following at project root +After installing pipenv. Create a virtual environment and install all required +packages. Run following at the project root: ``` pipenv install -d ``` -Now you can activate the virtual environment with +Now you can activate the virtual environment with: ``` pipenv shell ``` -Now your `python` and `pip` commands will correspond to created virtual environment -instead of your system's Python installation. +Now your `python` and `pip` commands will correspond to the created virtual +environment instead of your system's Python installation. -To deactivate the environment, use +To deactivate the environment, use: ``` exit @@ -260,31 +265,32 @@ exit This project uses [pytest](https://pypi.org/project/pytest/) for testing. Some test are written using Python's own unittest testing framework, but they work -with pytest out of the box. Pytest style is preferred way to write tests. +with pytest out of the box. The pytest style is the preferred way to write +tests. -To run tests from project root, use `pytest` or +To run tests from project root, use `pytest` or: ``` pipenv run pytest ``` -During testing dot viewer might be opened if you have it installed. This is -because GDB integration command `viewcfg` is tested, which will open external -dot viewer. Just close it after it's opened. It should not affect the test run -itself. +During testing the dot viewer might be opened if you have it installed. This is +because the GDB integration command `viewcfg` is tested, which will open +the external dot viewer. Just close it after it's opened. It should not affect +the test itself. ### Code Linting Project uses [flake8](https://flake8.pycqa.org/en/latest/) and [pylint](https://pylint.org/) for code linting. -To run flake8, use +To run flake8, use: ``` flake8 ``` -And to run pylint use +And to run pylint use: ``` pylint src test @@ -294,16 +300,16 @@ Both commands should not print any errors. ### Command-Line Interface -To test command-line interface of asm2cfg wihtout installing the package. You -can execute module directly. For example to print help +To test the command-line interface of asm2cfg without installing the package +you can execute the module directly. For example to print the help message: ``` python -m src.asm2cfg -h ``` -Standalone method can be used to try out the examples under `examples` folder as -well. For example following command should generate `main.pdf` file to current -working directory. +The standalone method can be used to try out the examples in the `examples` +folder as well. For example the following command should generate the +`main.pdf` file in the current working directory: ``` python -m src.asm2cfg -c examples/huge.asm @@ -311,37 +317,37 @@ python -m src.asm2cfg -c examples/huge.asm ### GDB Integration -Before testing GDB functionality, make sure asm2cfg is not installed with pip! -This can lead to GDB using code from pip installed asm2cfg package instead of -code from this repository! +Before testing the GDB functionality, make sure asm2cfg is not installed with +pip! This can lead to GDB using code from the asm2cfg package installed by pip +instead of the code from this repository! -Also pipenv cannot be used with GDB. You need to install required packages to -your system's Python pip. This is because your installed GDB is linked against -system's Python interpreter and will use it, instead of active virtual -environment. If packages are not installed to your system's pip. You are likely -to receive following error messages when trying to use asm2cfg with GDB +Also pipenv cannot be used with GDB. You need to install the required packages to +your system's python pip. This is because your installed GDB is linked against +your system's python interpreter and will use it, instead of the active virtual +environment. If the packages are not installed to your system's pip you are likely +to receive following error messages when trying to use asm2cfg with GDB: ``` ModuleNotFoundError: No module named 'graphviz' ``` To fix this, install required packages to your system's pip without active -virtual environment. Currently GDB integration only requires graphviz. +virtual environment. Currently the GDB integration only requires graphviz. ``` pip install graphviz ``` -To use asm2cfg GDB related functionality. Use following line from -project root. +To use the GDB related functionality of asm2cfg. Use following line from +project root: ``` PYTHONPATH=${PWD}/src gdb -ex 'source src/gdb_asm2cfg.py' ``` -This will set Python import path so that GDB can import code from this +This will set the python import path so that GDB can import code from this repository without installing the package. After this you should be able to use -commands `viewcfg` and `savecfg`. +the commands `viewcfg` and `savecfg`. ### Current Development Goals @@ -351,5 +357,5 @@ lines. If you encounter such problems please open an issue. Current developed goals are best described in issues section. Please open a new one if existing one does not exist. -If you want to talk to me, you can contact me at Discord with name +If you want to talk to me, you can contact me on Discord with name `Kazhuu#3121`. From ec71a72f46c1bfc509bf12c3e08658a595bc0f1c Mon Sep 17 00:00:00 2001 From: cooki35 <33752572+cooki35@users.noreply.github.com> Date: Tue, 29 Nov 2022 15:40:19 +0100 Subject: [PATCH 4/4] Add a CSV file to the examples. --- examples/dataframe.csv | 60 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 examples/dataframe.csv diff --git a/examples/dataframe.csv b/examples/dataframe.csv new file mode 100644 index 0000000..252ca4a --- /dev/null +++ b/examples/dataframe.csv @@ -0,0 +1,60 @@ +address;bytes;operator;operand +4608;55;pushq;%rbp +4609;53;pushq;%rbx +4610;50;pushq;%rax +4611;488b5e08;movq;8(%rsi), %rbx +4615;4889df;movq;%rbx, %rdi +4618;e827410000;callq;4176 +4623;85c0;testl;%eax, %eax +4625;7437;je;4642 +4627;8d48ff;leal;-1(%rax), %ecx +4630;83f903;cmpl;$3, %ecx +4633;732e;jae;4649 +4635;31ed;xorl;%ebp, %ebp +4637;e9fa000000;jmp;4718 +4642;31ed;xorl;%ebp, %ebp +4644;e9a1010000;jmp;4777 +4649;89c1;movl;%eax, %ecx +4651;83e1fc;andl;$-4, %ecx +4654;31ed;xorl;%ebp, %ebp +4656;0f1f840000000000;imull;$131, %ebp, %edx +4664;69d531010000;movsbl;(%rbx), %esi +4670;0fbe33;addl;%edx, %esi +4673;01d6;imull;$131, %esi, %edx +4675;69d631010000;movsbl;1(%rbx), %esi +4681;0fbe7301;addl;%edx, %esi +4685;01d6;imull;$131, %esi, %edx +4687;69d631010000;movsbl;2(%rbx), %esi +4693;0fbe7302;addl;%edx, %esi +4697;01d6;imull;$131, %esi, %edx +4699;69d631010000;movsbl;3(%rbx), %ebp +4705;0fbe6b03;addl;%edx, %ebp +4709;01d5;addq;$4, %rbx +4711;4883c304;addl;$-4, %ecx +4715;83c1fc;jne;4656 +4718;0f854cffffff;testb;$3, %al +4724;a803;je;4760 +4726;746c;andl;$3, %eax +4728;83e003;xorl;%ecx, %ecx +4731;31c9;imull;$131, %ebp, %edx +4733;66662e0f1f840000000000;movsbl;(%rbx,%rcx), %ebp +4744;69d531010000;addl;%edx, %ebp +4750;0fbe2c0b;addq;$1, %rcx +4754;01d5;cmpl;%ecx, %eax +4756;4883c101;jne;4731 +4760;39c8;cmpl;$-1114471758, %ebp +4762;75bc;jne;4777 +4764;81fda8e8b8eb;movl;$-1114471758, %ebp +4770;7526;leaq;.Lstr.1(%rip), %rdi +4772;bda8e8b8eb;jmp;4784 +4777;488d3dae000000;leaq;.Lstr(%rip), %rdi +4784;eb0f;callq;4144 +4786;488d3d82000000;leaq;.L.str.2(%rip), %rdi +4793;e8d63e0000;movl;%ebp, %esi +4798;488d3d52000000;xorl;%eax, %eax +4805;89ee;callq;4128 +4807;31c0;xorl;%eax, %eax +4809;e88a3e0000;addq;$8, %rsp +4814;31c0;popq;%rbx +4816;4883c408;popq;%rbp +4820;5b;retq;None