Skip to content

Commit

Permalink
Merge pull request mandiant#347 from fireeye/fix/non-ascii-char-filename
Browse files Browse the repository at this point in the history
get decoded sample path
  • Loading branch information
mr-tz authored Oct 23, 2020
2 parents 425613e + 6793169 commit 0e009c7
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 2 deletions.
13 changes: 11 additions & 2 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,14 @@ def main(argv=None):
parser = argparse.ArgumentParser(
description=desc, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("sample", type=str, help="path to sample to analyze")
parser.add_argument(
# in #328 we noticed that the sample path is not handled correctly if it contains non-ASCII characters
# https://stackoverflow.com/a/22947334/ offers a solution and decoding using getfilesystemencoding works
# in our testing, however other sources suggest `sys.stdin.encoding` (https://stackoverflow.com/q/4012571/)
"sample",
type=lambda s: s.decode(sys.getfilesystemencoding()),
help="path to sample to analyze",
)
parser.add_argument("--version", action="version", version="%(prog)s {:s}".format(capa.version.__version__))
parser.add_argument(
"-r",
Expand Down Expand Up @@ -493,7 +500,9 @@ def main(argv=None):
try:
taste = get_file_taste(args.sample)
except IOError as e:
logger.error("%s", str(e))
# per our research there's not a programmatic way to render the IOError with non-ASCII filename unless we
# handle the IOError separately and reach into the args
logger.error("%s", e.args[0])
return -1

# py2 doesn't know about cp65001, which is a variant of utf-8 on windows
Expand Down
8 changes: 8 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -146,6 +147,8 @@ def get_data_path_by_name(name):
return os.path.join(CD, "data", "64d9f7d96b99467f36e22fada623c3bb.dll_")
elif name.startswith("82bf6"):
return os.path.join(CD, "data", "82BF6347ACF15E5D883715DC289D8A2B.exe_")
elif name.startswith("pingtaest"):
return os.path.join(CD, "data", "ping_täst.exe_")
else:
raise ValueError("unexpected sample fixture")

Expand Down Expand Up @@ -542,3 +545,8 @@ def z499c2_extractor():
@pytest.fixture
def al_khaser_x86_extractor():
return get_extractor(get_data_path_by_name("al-khaser x86"))


@pytest.fixture
def pingtaest_extractor():
return get_extractor(get_data_path_by_name("pingtaest"))
25 changes: 25 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -57,6 +58,30 @@ def test_main_single_rule(z9324d_extractor, tmpdir):
)


@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_main_non_ascii_filename(pingtaest_extractor, tmpdir, capsys):
# on py2.7, need to be careful about str (which can hold bytes)
# vs unicode (which is only unicode characters).
# on py3, this should not be needed.
#
# here we print a string with unicode characters in it
# (specifically, a byte string with utf-8 bytes in it, see file encoding)
assert capa.main.main(["-q", pingtaest_extractor.path]) == 0

std = capsys.readouterr()
# but here, we have to use a unicode instance,
# because capsys has decoded the output for us.
assert pingtaest_extractor.path.decode("utf-8") in std.out


@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_main_non_ascii_filename_nonexistent(tmpdir, caplog):
NON_ASCII_FILENAME = "täst_not_there.exe"
assert capa.main.main(["-q", NON_ASCII_FILENAME]) == -1

assert NON_ASCII_FILENAME.decode("utf-8") in caplog.text


@pytest.mark.xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2")
def test_main_shellcode(z499c2_extractor):
path = z499c2_extractor.path
Expand Down

0 comments on commit 0e009c7

Please sign in to comment.