From db95e0c13074d1411f41b9197b982a3c46eb5453 Mon Sep 17 00:00:00 2001 From: bee-san Date: Sun, 30 May 2021 08:41:03 +0100 Subject: [PATCH 01/18] filter system part1 --- pywhat/filtration_distribution/__init__.py | 0 .../filtration_distribution/distribution.py | 37 +++++++++++++++++++ pywhat/filtration_distribution/filter.py | 16 ++++++++ pywhat/regex_identifier.py | 11 +++--- 4 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 pywhat/filtration_distribution/__init__.py create mode 100644 pywhat/filtration_distribution/distribution.py create mode 100644 pywhat/filtration_distribution/filter.py diff --git a/pywhat/filtration_distribution/__init__.py b/pywhat/filtration_distribution/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pywhat/filtration_distribution/distribution.py b/pywhat/filtration_distribution/distribution.py new file mode 100644 index 0000000..b3f31a1 --- /dev/null +++ b/pywhat/filtration_distribution/distribution.py @@ -0,0 +1,37 @@ +import os +from pathlib import Path +import json + + +from pywhat.filtration_distribution.filter import Filter + +class Distribution: + """ + A distribution is an object containing the regex + But the regex has gone through a filter process. + + Example filters: + * {"Tags": ["Networking"]} + """ + + def __init__(self, filters_dict: dict): + # Load the regex + path = "Data/regex.json" + fullpath = os.path.join(Path(__file__).resolve().parent.parent, path) + with open(fullpath, "r") as myfile: + self.regexes = json.load(myfile) + + self.filter_system = Filter() + + # If we are given filters, filter the regex! + if filters_dict: + self.filter(filters_dict) + + + def filter(self, filters_dict): + if "Tags" in filters_dict: + self.regexes = self.filter_system.filter_by_tag(self.regexes, filters_dict) + + def get_regexes(self): + return self.regexes + diff --git a/pywhat/filtration_distribution/filter.py b/pywhat/filtration_distribution/filter.py new file mode 100644 index 0000000..a8f2a26 --- /dev/null +++ b/pywhat/filtration_distribution/filter.py @@ -0,0 +1,16 @@ +class Filter: + @staticmethod + def filter_by_tag(regexes: dict, filters: set, dont_include: set = set()): + tags = set(filters["Tags"]) + + out = [] + for i in regexes: + set_tags = set(i["Tags"]) + int = set_tags.intersection(tags) + print(tags) + print(int) + has_tag = True if len(int) > 0 else False + has_no_bad_tags = True if len(set_tags.intersection(dont_include)) == 0 else False + if has_tag and has_no_bad_tags: + out += i + return out diff --git a/pywhat/regex_identifier.py b/pywhat/regex_identifier.py index 040ae46..8002dc4 100644 --- a/pywhat/regex_identifier.py +++ b/pywhat/regex_identifier.py @@ -2,18 +2,17 @@ import os import re +from pywhat.filtration_distribution.distribution import Distribution + class RegexIdentifier: - def __init__(self): - path = "Data/regex.json" - fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) - with open(fullpath, "r") as myfile: - self.regexes = json.load(myfile) + def __init__(self, filters_dict = {"Tags": "Networking"}): + self.distribution = Distribution(filters_dict) def check(self, text): matches = [] for txt in text: - for reg in self.regexes: + for reg in self.distribution.get_regexes(): matched_regex = re.search(reg["Regex"], txt, re.UNICODE) if matched_regex: From 80c99d6feca029118c4838fd4c21d08fb8e242c1 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Sun, 30 May 2021 22:47:57 +0300 Subject: [PATCH 02/18] Add filtration by rarity to API --- pywhat/Data/regex.json | 2 +- pywhat/identifier.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pywhat/Data/regex.json b/pywhat/Data/regex.json index d76dead..2743805 100644 --- a/pywhat/Data/regex.json +++ b/pywhat/Data/regex.json @@ -424,7 +424,7 @@ "Regex": "^[0-9]{3}-[0-9]{2}-[0-9]{4}$", "plural_name": false, "Description": "An [#CAE4F1][link=https://en.wikipedia.org/wiki/Social_Security_number]American Identification Number[/link][/#CAE4F1]", - "rarity": 0.2, + "Rarity": 0.2, "Tags": [ "Credentials", "Password", diff --git a/pywhat/identifier.py b/pywhat/identifier.py index 6e0706e..e6b27ab 100644 --- a/pywhat/identifier.py +++ b/pywhat/identifier.py @@ -10,7 +10,7 @@ def __init__(self): self.file_sig = FileSignatures() self.name_that_hash = Nth() - def identify(self, text: str, api=False) -> dict: + def identify(self, text: str, min_rarity=0, max_rarity=1, api=False) -> dict: identify_obj = {} magic_numbers = None @@ -25,7 +25,14 @@ def identify(self, text: str, api=False) -> dict: # If file doesn't exist, check to see if the inputted text is # a file in hex format identify_obj["File Signatures"] = self.file_sig.check_magic_nums(text) - identify_obj["Regexes"] = self.regex_id.check(text) + + regexes = self.regex_id.check(text) + identify_obj["Regexes"] = [] + + for regex in regexes: + if min_rarity <= regex["Regex Pattern"]["Rarity"] <= max_rarity: + identify_obj["Regexes"].append(regex) + # get_hashes takes a list of hashes, we split to give it a list # identify_obj["Hashes"] = self.name_that_hash.get_hashes(text.split()) From 174e5da62d166e18159ac306e476c37cd3f8e5ae Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Mon, 31 May 2021 08:11:22 +0300 Subject: [PATCH 03/18] Add filtration by tags to API and tag printing --- pywhat/identifier.py | 24 +++++++++++++++++++----- pywhat/what.py | 13 +++++++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/pywhat/identifier.py b/pywhat/identifier.py index e6b27ab..ba00bba 100644 --- a/pywhat/identifier.py +++ b/pywhat/identifier.py @@ -1,7 +1,9 @@ -from pywhat.regex_identifier import RegexIdentifier +import os.path +from typing import Optional + from pywhat.magic_numbers import FileSignatures from pywhat.nameThatHash import Nth -import os.path +from pywhat.regex_identifier import RegexIdentifier class Identifier: @@ -9,8 +11,15 @@ def __init__(self): self.regex_id = RegexIdentifier() self.file_sig = FileSignatures() self.name_that_hash = Nth() - - def identify(self, text: str, min_rarity=0, max_rarity=1, api=False) -> dict: + self.tags = set() + for regex in self.regex_id.regexes: + self.tags.update(regex["Tags"]) + + def identify(self, text: str, + min_rarity=0, max_rarity=1, + included_tags: Optional[list[str]] = None, + excluded_tags: Optional[list[str]] = None, + api=False) -> dict: identify_obj = {} magic_numbers = None @@ -28,9 +37,14 @@ def identify(self, text: str, min_rarity=0, max_rarity=1, api=False) -> dict: regexes = self.regex_id.check(text) identify_obj["Regexes"] = [] + used_tags = ( + set(self.tags) if included_tags is None else set(included_tags)) + if excluded_tags is not None: + used_tags -= set(excluded_tags) for regex in regexes: - if min_rarity <= regex["Regex Pattern"]["Rarity"] <= max_rarity: + if (min_rarity <= regex["Regex Pattern"]["Rarity"] <= max_rarity and + used_tags & set(regex["Regex Pattern"]["Tags"])): identify_obj["Regexes"].append(regex) # get_hashes takes a list of hashes, we split to give it a list diff --git a/pywhat/what.py b/pywhat/what.py index 6c0f9ac..532ac9d 100644 --- a/pywhat/what.py +++ b/pywhat/what.py @@ -1,13 +1,26 @@ +import sys + import click +from rich.console import Console + from pywhat import identifier, printer +def print_tags(ctx, value): + if value: + id = identifier.Identifier() + console = Console() + console.print("[bold #D7Afff]" + "\n".join(id.tags) + "[/bold #D7Afff]") + sys.exit() + + @click.command( context_settings=dict( ignore_unknown_options=True, ) ) @click.argument("text_input", required=True) +@click.option("--tags", is_flag=True, expose_value=False, callback=print_tags, help="Show available tags and exit.") def main(text_input): """ What - Identify what something is.\n From d98e47584262ddd792d2b8db258ed45d46f19029 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Mon, 31 May 2021 11:32:38 +0300 Subject: [PATCH 04/18] Add filtration to CLI --- pywhat/what.py | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/pywhat/what.py b/pywhat/what.py index 532ac9d..f195a9c 100644 --- a/pywhat/what.py +++ b/pywhat/what.py @@ -6,7 +6,7 @@ from pywhat import identifier, printer -def print_tags(ctx, value): +def print_tags(ctx, opts, value): if value: id = identifier.Identifier() console = Console() @@ -21,7 +21,10 @@ def print_tags(ctx, value): ) @click.argument("text_input", required=True) @click.option("--tags", is_flag=True, expose_value=False, callback=print_tags, help="Show available tags and exit.") -def main(text_input): +@click.option("--rarity") +@click.option("--include_tags") +@click.option("--exclude_tags") +def main(text_input, rarity, include_tags, exclude_tags): """ What - Identify what something is.\n @@ -40,9 +43,29 @@ def main(text_input): Your text must either be in quotation marks, or use the POSIX standard of "--" to mean "anything after -- is textual input". """ + min_rarity = 0 + max_rarity = 1 + included_tags = None + excluded_tags = None + + if rarity is not None: + rarities = rarity.split(":") + if len(rarities) != 2: + print("Invalid rarity range format ('min:max' expected)") + sys.exit(1) + if not rarities[0].isspace() and rarities[0]: + min_rarity = float(rarities[0]) + if not rarities[1].isspace() and rarities[1]: + max_rarity = float(rarities[1]) + + if include_tags is not None: + included_tags = list(map(str.strip, include_tags.split(','))) + if exclude_tags is not None: + excluded_tags = list(map(str.strip, exclude_tags.split(','))) what_obj = What_Object() - identified_output = what_obj.what_is_this(text_input) + identified_output = what_obj.what_is_this( + text_input, min_rarity, max_rarity, included_tags, excluded_tags) p = printer.Printing() p.pretty_print(identified_output) @@ -52,11 +75,16 @@ class What_Object: def __init__(self): self.id = identifier.Identifier() - def what_is_this(self, text: str) -> dict: + def what_is_this( + self, text: str, + min_rarity, max_rarity, included_tags, excluded_tags) -> dict: """ Returns a Python dictionary of everything that has been identified """ - return self.id.identify(text) + return self.id.identify( + text, + min_rarity=min_rarity, max_rarity=max_rarity, + included_tags=included_tags, excluded_tags=excluded_tags) if __name__ == "__main__": From 2c17519786253301c8661ad02836df5599a940be Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Mon, 31 May 2021 12:06:33 +0300 Subject: [PATCH 05/18] Update tests and docs --- README.md | 2 ++ pywhat/what.py | 28 +++++++++++++++++++++------- tests/test_identifier.py | 24 ++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 86b455f..0300736 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,8 @@ Or if you come across some piece of text and you don't know what it is, `What` w **File Opening** You can pass in a file path by `what "this/is/a/file/path"`. What is smart enough to figure out it's a file! +**Filtration** You can filter output by using `what --rarity 0.2:0.8 --include_tags tag1,tag2 TEXT`. Use `what --help` to get more information. + # 🍕 API PyWhat has an API! Click here [https://github.com/bee-san/pyWhat/wiki/API](https://github.com/bee-san/pyWhat/wiki/API) to read about it. diff --git a/pywhat/what.py b/pywhat/what.py index f195a9c..befdf62 100644 --- a/pywhat/what.py +++ b/pywhat/what.py @@ -21,9 +21,9 @@ def print_tags(ctx, opts, value): ) @click.argument("text_input", required=True) @click.option("--tags", is_flag=True, expose_value=False, callback=print_tags, help="Show available tags and exit.") -@click.option("--rarity") -@click.option("--include_tags") -@click.option("--exclude_tags") +@click.option("--rarity", help="Filter by rarity.") +@click.option("--include_tags", help="Only print entries with included tags.") +@click.option("--exclude_tags", help="Exclude tags.") def main(text_input, rarity, include_tags, exclude_tags): """ What - Identify what something is.\n @@ -32,6 +32,14 @@ def main(text_input, rarity, include_tags, exclude_tags): https://github.com/bee-san\n + Filtration:\n + --rarity min:max\n + Only print entries with rarity in range [min,max]. min and max can be omitted.\n + --include_tags list\n + Only include entries containing at least one tag in a list. List is a comma separated list.\n + --include_tags list\n + Exclude specified tags. List is a comma separated list.\n + Examples: * what "HTB{this is a flag}" @@ -40,6 +48,8 @@ def main(text_input, rarity, include_tags, exclude_tags): * what -- 52.6169586, -1.9779857 + * what --rarity 0.6: myEmail@host.org + Your text must either be in quotation marks, or use the POSIX standard of "--" to mean "anything after -- is textual input". """ @@ -53,10 +63,14 @@ def main(text_input, rarity, include_tags, exclude_tags): if len(rarities) != 2: print("Invalid rarity range format ('min:max' expected)") sys.exit(1) - if not rarities[0].isspace() and rarities[0]: - min_rarity = float(rarities[0]) - if not rarities[1].isspace() and rarities[1]: - max_rarity = float(rarities[1]) + try: + if not rarities[0].isspace() and rarities[0]: + min_rarity = float(rarities[0]) + if not rarities[1].isspace() and rarities[1]: + max_rarity = float(rarities[1]) + except ValueError: + print("Invalid rarity argument (float expected)") + sys.exit(1) if include_tags is not None: included_tags = list(map(str.strip, include_tags.split(','))) diff --git a/tests/test_identifier.py b/tests/test_identifier.py index 939a45e..bc7ed98 100644 --- a/tests/test_identifier.py +++ b/tests/test_identifier.py @@ -6,3 +6,27 @@ def test_identifier_works(): out = r.identify("DANHz6EQVoWyZ9rER56DwTXHWUxfkv9k2o") assert "Dogecoin (DOGE) Wallet Address" in out["Regexes"][0]["Regex Pattern"]["Name"] + +def test_rarity_filtration(): + r = identifier.Identifier() + out = r.identify("someguy@gmail.com", min_rarity=0.6) + assert len(out["Regexes"]) == 0 + + +def test_rarity_filtration2(): + r = identifier.Identifier() + out = r.identify("ScOAntcCa78", max_rarity=0.1) + assert len(out["Regexes"]) == 0 + + +def test_tag_filtration(): + r = identifier.Identifier() + out = r.identify("fixtures/file", included_tags=["Cyber Security"]) + for regex in out["Regexes"]: + assert "Cyber Security" in regex["Regex Pattern"]["Tags"] + + +def test_tag_filtration2(): + r = identifier.Identifier() + out = r.identify("+91 (385) 985 2821", excluded_tags=["Identifiers", "Credentials"]) + assert len(out["Regexes"]) == 0 From c4aff17b8d598ba370ac1673d4e09f3f3a733816 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Mon, 31 May 2021 12:42:30 +0300 Subject: [PATCH 06/18] Exclude entries specified in excluded_tags --- pywhat/identifier.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pywhat/identifier.py b/pywhat/identifier.py index ba00bba..10a55a7 100644 --- a/pywhat/identifier.py +++ b/pywhat/identifier.py @@ -39,12 +39,13 @@ def identify(self, text: str, identify_obj["Regexes"] = [] used_tags = ( set(self.tags) if included_tags is None else set(included_tags)) - if excluded_tags is not None: - used_tags -= set(excluded_tags) + unused_tags = (set() if excluded_tags is None else set(excluded_tags)) for regex in regexes: if (min_rarity <= regex["Regex Pattern"]["Rarity"] <= max_rarity and - used_tags & set(regex["Regex Pattern"]["Tags"])): + used_tags & set(regex["Regex Pattern"]["Tags"]) and + not unused_tags & set(regex["Regex Pattern"]["Tags"]) + ): identify_obj["Regexes"].append(regex) # get_hashes takes a list of hashes, we split to give it a list From 100227fe83ef2358dc63943026c6e4149c8b10e2 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Mon, 31 May 2021 14:00:21 +0300 Subject: [PATCH 07/18] Fix typing --- pywhat/identifier.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pywhat/identifier.py b/pywhat/identifier.py index 10a55a7..b646b78 100644 --- a/pywhat/identifier.py +++ b/pywhat/identifier.py @@ -1,5 +1,5 @@ import os.path -from typing import Optional +from typing import List, Optional from pywhat.magic_numbers import FileSignatures from pywhat.nameThatHash import Nth @@ -17,8 +17,8 @@ def __init__(self): def identify(self, text: str, min_rarity=0, max_rarity=1, - included_tags: Optional[list[str]] = None, - excluded_tags: Optional[list[str]] = None, + included_tags: Optional[List[str]] = None, + excluded_tags: Optional[List[str]] = None, api=False) -> dict: identify_obj = {} From 92a5714fcc20f6031b65b12043cdd44ee9755ef6 Mon Sep 17 00:00:00 2001 From: bee-san Date: Mon, 31 May 2021 12:34:56 +0100 Subject: [PATCH 08/18] Uploading --- pywhat/identifier.py | 2 +- pywhat/regex_identifier.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pywhat/identifier.py b/pywhat/identifier.py index 6e0706e..da86a53 100644 --- a/pywhat/identifier.py +++ b/pywhat/identifier.py @@ -10,7 +10,7 @@ def __init__(self): self.file_sig = FileSignatures() self.name_that_hash = Nth() - def identify(self, text: str, api=False) -> dict: + def identify(self, text: str, api=False, filters_dict = {"Tags": "Networking"}) -> dict: identify_obj = {} magic_numbers = None diff --git a/pywhat/regex_identifier.py b/pywhat/regex_identifier.py index 8002dc4..dbfc7ff 100644 --- a/pywhat/regex_identifier.py +++ b/pywhat/regex_identifier.py @@ -6,7 +6,7 @@ class RegexIdentifier: - def __init__(self, filters_dict = {"Tags": "Networking"}): + def __init__(self, filters_dict): self.distribution = Distribution(filters_dict) def check(self, text): From bc10a721dfe541d70a5541e4861d7acf200d9665 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Mon, 31 May 2021 17:18:22 +0300 Subject: [PATCH 09/18] Fix some broken stuff --- .../filtration_distribution/distribution.py | 7 ++--- pywhat/filtration_distribution/filter.py | 4 +-- pywhat/identifier.py | 27 +++++++------------ pywhat/regex_identifier.py | 8 ++++-- pywhat/what.py | 26 +++++++----------- tests/test_identifier.py | 3 ++- tests/test_regex_identifier.py | 4 +-- 7 files changed, 35 insertions(+), 44 deletions(-) diff --git a/pywhat/filtration_distribution/distribution.py b/pywhat/filtration_distribution/distribution.py index b3f31a1..92c43ac 100644 --- a/pywhat/filtration_distribution/distribution.py +++ b/pywhat/filtration_distribution/distribution.py @@ -1,10 +1,11 @@ +import json import os from pathlib import Path -import json - +from typing import Optional from pywhat.filtration_distribution.filter import Filter + class Distribution: """ A distribution is an object containing the regex @@ -14,7 +15,7 @@ class Distribution: * {"Tags": ["Networking"]} """ - def __init__(self, filters_dict: dict): + def __init__(self, filters_dict: Optional[dict] = None): # Load the regex path = "Data/regex.json" fullpath = os.path.join(Path(__file__).resolve().parent.parent, path) diff --git a/pywhat/filtration_distribution/filter.py b/pywhat/filtration_distribution/filter.py index a8f2a26..65e41fe 100644 --- a/pywhat/filtration_distribution/filter.py +++ b/pywhat/filtration_distribution/filter.py @@ -9,8 +9,8 @@ def filter_by_tag(regexes: dict, filters: set, dont_include: set = set()): int = set_tags.intersection(tags) print(tags) print(int) - has_tag = True if len(int) > 0 else False - has_no_bad_tags = True if len(set_tags.intersection(dont_include)) == 0 else False + has_tag = bool(int) + has_no_bad_tags = bool(set_tags.intersection(dont_include)) if has_tag and has_no_bad_tags: out += i return out diff --git a/pywhat/identifier.py b/pywhat/identifier.py index aadb029..c7809fd 100644 --- a/pywhat/identifier.py +++ b/pywhat/identifier.py @@ -1,21 +1,23 @@ import os.path from typing import List, Optional +from pywhat.filtration_distribution.distribution import Distribution from pywhat.magic_numbers import FileSignatures from pywhat.nameThatHash import Nth from pywhat.regex_identifier import RegexIdentifier class Identifier: - def __init__(self): - self.regex_id = RegexIdentifier() + def __init__(self, distribution: Optional[Distribution] = None): + if distribution is None: + self.distribution = Distribution() + else: + self.distribution = distribution + self.regex_id = RegexIdentifier(self.distribution) self.file_sig = FileSignatures() self.name_that_hash = Nth() - self.tags = set() - for regex in self.regex_id.regexes: - self.tags.update(regex["Tags"]) - def identify(self, text: str, api=False, filters_dict = {"Tags": "Networking"}) -> dict: + def identify(self, text: str, api=False) -> dict: identify_obj = {} magic_numbers = None @@ -31,18 +33,7 @@ def identify(self, text: str, api=False, filters_dict = {"Tags": "Networking"}) # a file in hex format identify_obj["File Signatures"] = self.file_sig.check_magic_nums(text) - regexes = self.regex_id.check(text) - identify_obj["Regexes"] = [] - used_tags = ( - set(self.tags) if included_tags is None else set(included_tags)) - unused_tags = (set() if excluded_tags is None else set(excluded_tags)) - - for regex in regexes: - if (min_rarity <= regex["Regex Pattern"]["Rarity"] <= max_rarity and - used_tags & set(regex["Regex Pattern"]["Tags"]) and - not unused_tags & set(regex["Regex Pattern"]["Tags"]) - ): - identify_obj["Regexes"].append(regex) + identify_obj["Regexes"] = self.regex_id.check(text) # get_hashes takes a list of hashes, we split to give it a list # identify_obj["Hashes"] = self.name_that_hash.get_hashes(text.split()) diff --git a/pywhat/regex_identifier.py b/pywhat/regex_identifier.py index e369c8b..26fd612 100644 --- a/pywhat/regex_identifier.py +++ b/pywhat/regex_identifier.py @@ -2,13 +2,17 @@ import json import os import re +from typing import Optional from pywhat.filtration_distribution.distribution import Distribution class RegexIdentifier: - def __init__(self, filters_dict): - self.distribution = Distribution(filters_dict) + def __init__(self, distribution: Optional[Distribution] = None): + if distribution is None: + self.distribution = Distribution() + else: + self.distribution = distribution def check(self, text): matches = [] diff --git a/pywhat/what.py b/pywhat/what.py index befdf62..67c23f3 100644 --- a/pywhat/what.py +++ b/pywhat/what.py @@ -4,6 +4,7 @@ from rich.console import Console from pywhat import identifier, printer +from pywhat.filtration_distribution.distribution import Distribution def print_tags(ctx, opts, value): @@ -53,11 +54,8 @@ def main(text_input, rarity, include_tags, exclude_tags): Your text must either be in quotation marks, or use the POSIX standard of "--" to mean "anything after -- is textual input". """ - min_rarity = 0 - max_rarity = 1 - included_tags = None - excluded_tags = None + """ if rarity is not None: rarities = rarity.split(":") if len(rarities) != 2: @@ -76,29 +74,25 @@ def main(text_input, rarity, include_tags, exclude_tags): included_tags = list(map(str.strip, include_tags.split(','))) if exclude_tags is not None: excluded_tags = list(map(str.strip, exclude_tags.split(','))) - - what_obj = What_Object() - identified_output = what_obj.what_is_this( - text_input, min_rarity, max_rarity, included_tags, excluded_tags) + """ + distribution = Distribution() + what_obj = What_Object(distribution) + identified_output = what_obj.what_is_this(text_input) p = printer.Printing() p.pretty_print(identified_output) class What_Object: - def __init__(self): - self.id = identifier.Identifier() + def __init__(self, distribution): + self.id = identifier.Identifier(distribution) def what_is_this( - self, text: str, - min_rarity, max_rarity, included_tags, excluded_tags) -> dict: + self, text: str) -> dict: """ Returns a Python dictionary of everything that has been identified """ - return self.id.identify( - text, - min_rarity=min_rarity, max_rarity=max_rarity, - included_tags=included_tags, excluded_tags=excluded_tags) + return self.id.identify(text) if __name__ == "__main__": diff --git a/tests/test_identifier.py b/tests/test_identifier.py index bc7ed98..3f3d4cb 100644 --- a/tests/test_identifier.py +++ b/tests/test_identifier.py @@ -6,7 +6,7 @@ def test_identifier_works(): out = r.identify("DANHz6EQVoWyZ9rER56DwTXHWUxfkv9k2o") assert "Dogecoin (DOGE) Wallet Address" in out["Regexes"][0]["Regex Pattern"]["Name"] - +""" def test_rarity_filtration(): r = identifier.Identifier() out = r.identify("someguy@gmail.com", min_rarity=0.6) @@ -30,3 +30,4 @@ def test_tag_filtration2(): r = identifier.Identifier() out = r.identify("+91 (385) 985 2821", excluded_tags=["Identifiers", "Credentials"]) assert len(out["Regexes"]) == 0 +""" \ No newline at end of file diff --git a/tests/test_regex_identifier.py b/tests/test_regex_identifier.py index 2072786..9e81388 100644 --- a/tests/test_regex_identifier.py +++ b/tests/test_regex_identifier.py @@ -4,8 +4,8 @@ def test_regex_successfully_parses(): r = regex_identifier.RegexIdentifier() - print(r.regexes) - assert "Name" in r.regexes[0] + print(r.distribution.get_regexes) + assert "Name" in r.distribution.get_regexes()[0] def test_regex_runs(): From 90b943b98daa08970e103542a9ee8a03a3f4365a Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Tue, 1 Jun 2021 18:12:15 +0300 Subject: [PATCH 10/18] CLI and API support distributions --- pywhat/__init__.py | 8 +++ pywhat/distribution.py | 54 +++++++++++++++++++ pywhat/filtration_distribution/__init__.py | 0 .../filtration_distribution/distribution.py | 38 ------------- pywhat/filtration_distribution/filter.py | 16 ------ pywhat/helper.py | 17 ++++++ pywhat/identifier.py | 11 ++-- pywhat/regex_identifier.py | 15 +++--- pywhat/what.py | 20 ++++--- 9 files changed, 105 insertions(+), 74 deletions(-) create mode 100644 pywhat/distribution.py delete mode 100644 pywhat/filtration_distribution/__init__.py delete mode 100644 pywhat/filtration_distribution/distribution.py delete mode 100644 pywhat/filtration_distribution/filter.py create mode 100644 pywhat/helper.py diff --git a/pywhat/__init__.py b/pywhat/__init__.py index e69de29..6351bb0 100644 --- a/pywhat/__init__.py +++ b/pywhat/__init__.py @@ -0,0 +1,8 @@ +from pywhat.distribution import Distribution +from pywhat.helper import AvailableTags +from pywhat.identifier import Identifier + +pywhat_tags = AvailableTags().get_tags() + + +__all__ = ["Identifier", "Distribution", "pywhat_tags"] diff --git a/pywhat/distribution.py b/pywhat/distribution.py new file mode 100644 index 0000000..34cd5b0 --- /dev/null +++ b/pywhat/distribution.py @@ -0,0 +1,54 @@ +import json +import os +from typing import Optional + +from pywhat.helper import AvailableTags + + +class Distribution: + """ + A distribution is an object containing the regex + But the regex has gone through a filter process. + + Example filters: + * {"Tags": ["Networking"]} + * {"Tags": ["Identifiers"], "ExcludeTags": ["Credentials"], "MinRarity":0.6} + """ + + def __init__(self, filters_dict: Optional[dict] = None): + tags = AvailableTags().get_tags() + self._dict = dict() + if filters_dict is None: + filters_dict = dict() + + self._dict["Tags"] = set(filters_dict.setdefault("Tags", tags)) + self._dict["ExcludeTags"] = set(filters_dict.setdefault("ExcludeTags", set())) + self._dict["MinRarity"] = filters_dict.setdefault("MinRarity", 0) + self._dict["MaxRarity"] = filters_dict.setdefault("MaxRarity", 1) + + if len(self._dict["Tags"]) == 0: + self._dict["Tags"] = tags + + # Load the regex + path = "Data/regex.json" + fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) + with open(fullpath, "r") as myfile: + self._regexes = json.load(myfile) + self.filter() + + def filter(self): + temp_regexes = [] + min_rarity = self._dict["MinRarity"] + max_rarity = self._dict["MaxRarity"] + for regex in self._regexes: + if ( + min_rarity <= regex["Rarity"] <= max_rarity + and set(regex["Tags"]) & self._dict["Tags"] + and not set(regex["Tags"]) & self._dict["ExcludeTags"] + ): + temp_regexes.append(regex) + + self._regexes = temp_regexes + + def get_regexes(self): + return list(self._regexes) diff --git a/pywhat/filtration_distribution/__init__.py b/pywhat/filtration_distribution/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pywhat/filtration_distribution/distribution.py b/pywhat/filtration_distribution/distribution.py deleted file mode 100644 index 92c43ac..0000000 --- a/pywhat/filtration_distribution/distribution.py +++ /dev/null @@ -1,38 +0,0 @@ -import json -import os -from pathlib import Path -from typing import Optional - -from pywhat.filtration_distribution.filter import Filter - - -class Distribution: - """ - A distribution is an object containing the regex - But the regex has gone through a filter process. - - Example filters: - * {"Tags": ["Networking"]} - """ - - def __init__(self, filters_dict: Optional[dict] = None): - # Load the regex - path = "Data/regex.json" - fullpath = os.path.join(Path(__file__).resolve().parent.parent, path) - with open(fullpath, "r") as myfile: - self.regexes = json.load(myfile) - - self.filter_system = Filter() - - # If we are given filters, filter the regex! - if filters_dict: - self.filter(filters_dict) - - - def filter(self, filters_dict): - if "Tags" in filters_dict: - self.regexes = self.filter_system.filter_by_tag(self.regexes, filters_dict) - - def get_regexes(self): - return self.regexes - diff --git a/pywhat/filtration_distribution/filter.py b/pywhat/filtration_distribution/filter.py deleted file mode 100644 index 65e41fe..0000000 --- a/pywhat/filtration_distribution/filter.py +++ /dev/null @@ -1,16 +0,0 @@ -class Filter: - @staticmethod - def filter_by_tag(regexes: dict, filters: set, dont_include: set = set()): - tags = set(filters["Tags"]) - - out = [] - for i in regexes: - set_tags = set(i["Tags"]) - int = set_tags.intersection(tags) - print(tags) - print(int) - has_tag = bool(int) - has_no_bad_tags = bool(set_tags.intersection(dont_include)) - if has_tag and has_no_bad_tags: - out += i - return out diff --git a/pywhat/helper.py b/pywhat/helper.py new file mode 100644 index 0000000..fcbebb6 --- /dev/null +++ b/pywhat/helper.py @@ -0,0 +1,17 @@ +"""Helper utilities""" +import json +import os.path + + +class AvailableTags(): + def __init__(self): + self.tags = set() + path = "Data/regex.json" + fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) + with open(fullpath) as myfile: + regexes = json.load(myfile) + for regex in regexes: + self.tags.update(regex["Tags"]) + + def get_tags(self): + return self.tags diff --git a/pywhat/identifier.py b/pywhat/identifier.py index c7809fd..3c7ad21 100644 --- a/pywhat/identifier.py +++ b/pywhat/identifier.py @@ -1,7 +1,7 @@ import os.path from typing import List, Optional -from pywhat.filtration_distribution.distribution import Distribution +from pywhat.distribution import Distribution from pywhat.magic_numbers import FileSignatures from pywhat.nameThatHash import Nth from pywhat.regex_identifier import RegexIdentifier @@ -13,11 +13,14 @@ def __init__(self, distribution: Optional[Distribution] = None): self.distribution = Distribution() else: self.distribution = distribution - self.regex_id = RegexIdentifier(self.distribution) + self.regex_id = RegexIdentifier() self.file_sig = FileSignatures() self.name_that_hash = Nth() - def identify(self, text: str, api=False) -> dict: + def identify(self, text: str, distribution: Distribution = None, + api=False) -> dict: + if distribution is None: + distribution = self.distribution identify_obj = {} magic_numbers = None @@ -33,7 +36,7 @@ def identify(self, text: str, api=False) -> dict: # a file in hex format identify_obj["File Signatures"] = self.file_sig.check_magic_nums(text) - identify_obj["Regexes"] = self.regex_id.check(text) + identify_obj["Regexes"] = self.regex_id.check(text, distribution) # get_hashes takes a list of hashes, we split to give it a list # identify_obj["Hashes"] = self.name_that_hash.get_hashes(text.split()) diff --git a/pywhat/regex_identifier.py b/pywhat/regex_identifier.py index 26fd612..6a5e44f 100644 --- a/pywhat/regex_identifier.py +++ b/pywhat/regex_identifier.py @@ -4,20 +4,19 @@ import re from typing import Optional -from pywhat.filtration_distribution.distribution import Distribution +from pywhat.distribution import Distribution class RegexIdentifier: - def __init__(self, distribution: Optional[Distribution] = None): - if distribution is None: - self.distribution = Distribution() - else: - self.distribution = distribution + def __init__(self): + self.distribution = Distribution() - def check(self, text): + def check(self, text, distribution: Optional[Distribution] = None): + if distribution is None: + distribution = self.distribution matches = [] for txt in text: - for reg in self.distribution.get_regexes(): + for reg in distribution.get_regexes(): matched_regex = re.search(reg["Regex"], txt, re.UNICODE) if matched_regex: diff --git a/pywhat/what.py b/pywhat/what.py index 67c23f3..f196631 100644 --- a/pywhat/what.py +++ b/pywhat/what.py @@ -3,15 +3,15 @@ import click from rich.console import Console -from pywhat import identifier, printer -from pywhat.filtration_distribution.distribution import Distribution +from pywhat import helper, identifier, printer +from pywhat.distribution import Distribution def print_tags(ctx, opts, value): if value: - id = identifier.Identifier() + tags = sorted(helper.AvailableTags().get_tags()) console = Console() - console.print("[bold #D7Afff]" + "\n".join(id.tags) + "[/bold #D7Afff]") + console.print("[bold #D7Afff]" + "\n".join(tags) + "[/bold #D7Afff]") sys.exit() @@ -55,7 +55,11 @@ def main(text_input, rarity, include_tags, exclude_tags): """ - """ + min_rarity = 0 + max_rarity = 1 + included_tags = [] + excluded_tags = [] + if rarity is not None: rarities = rarity.split(":") if len(rarities) != 2: @@ -69,13 +73,13 @@ def main(text_input, rarity, include_tags, exclude_tags): except ValueError: print("Invalid rarity argument (float expected)") sys.exit(1) - if include_tags is not None: included_tags = list(map(str.strip, include_tags.split(','))) if exclude_tags is not None: excluded_tags = list(map(str.strip, exclude_tags.split(','))) - """ - distribution = Distribution() + distribution = Distribution( + {"Tags": included_tags, "ExcludeTags": excluded_tags, + "MinRarity": min_rarity, "MaxRarity": max_rarity}) what_obj = What_Object(distribution) identified_output = what_obj.what_is_this(text_input) From 5f6f66a6b997a698ac2917b77b9c6f22c1c4a739 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Tue, 1 Jun 2021 18:29:57 +0300 Subject: [PATCH 11/18] Fix file encoding --- pywhat/distribution.py | 2 +- pywhat/helper.py | 2 +- pywhat/regex_identifier.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pywhat/distribution.py b/pywhat/distribution.py index 991da0e..854586d 100644 --- a/pywhat/distribution.py +++ b/pywhat/distribution.py @@ -32,7 +32,7 @@ def __init__(self, filters_dict: Optional[dict] = None): # Load the regex path = "Data/regex.json" fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) - with open(fullpath, "r", encoding="utf8") as myfile: + with open(fullpath, "r", encoding="utf-8") as myfile: self._regexes = json.load(myfile) self.filter() diff --git a/pywhat/helper.py b/pywhat/helper.py index 0dd7b6f..17019ce 100644 --- a/pywhat/helper.py +++ b/pywhat/helper.py @@ -8,7 +8,7 @@ def __init__(self): self.tags = set() path = "Data/regex.json" fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) - with open(fullpath, "r", encoding="utf8") as myfile: + with open(fullpath, "r", encoding="utf-8") as myfile: regexes = json.load(myfile) for regex in regexes: self.tags.update(regex["Tags"]) diff --git a/pywhat/regex_identifier.py b/pywhat/regex_identifier.py index 8958b1c..0e9deab 100644 --- a/pywhat/regex_identifier.py +++ b/pywhat/regex_identifier.py @@ -30,7 +30,7 @@ def check(self, text, distribution: Optional[Distribution] = None): codes_path = "Data/phone_codes.json" codes_fullpath = os.path.join( os.path.dirname(os.path.abspath(__file__)), codes_path) - with open(codes_fullpath, "r", encoding="utf8") as myfile: + with open(codes_fullpath, "r", encoding="utf-8") as myfile: codes = json.load(myfile) locations = [] From c47210390d0d89108e9e50cff3793c610f805ce4 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Wed, 2 Jun 2021 16:54:49 +0300 Subject: [PATCH 12/18] Update distributions and hide 'private' API --- pywhat/distribution.py | 59 +++++++++++++++++++++++++++++++++++++++--- pywhat/helper.py | 8 ++++++ pywhat/identifier.py | 20 +++++++------- pywhat/what.py | 13 +++++++--- 4 files changed, 83 insertions(+), 17 deletions(-) diff --git a/pywhat/distribution.py b/pywhat/distribution.py index 854586d..a1d47d0 100644 --- a/pywhat/distribution.py +++ b/pywhat/distribution.py @@ -2,7 +2,7 @@ import os from typing import Optional -from pywhat.helper import AvailableTags +from pywhat.helper import AvailableTags, InvalidTag class Distribution: @@ -29,14 +29,19 @@ def __init__(self, filters_dict: Optional[dict] = None): if len(self._dict["Tags"]) == 0: self._dict["Tags"] = tags - # Load the regex + if not self._dict["Tags"].issubset(tags) or not self._dict["ExcludeTags"].issubset(tags): + raise InvalidTag("Passed filter contains tags that are not used by 'what'") + + self._load_regexes() + + def _load_regexes(self): path = "Data/regex.json" fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) with open(fullpath, "r", encoding="utf-8") as myfile: self._regexes = json.load(myfile) - self.filter() + self._filter() - def filter(self): + def _filter(self): temp_regexes = [] min_rarity = self._dict["MinRarity"] max_rarity = self._dict["MaxRarity"] @@ -52,3 +57,49 @@ def filter(self): def get_regexes(self): return list(self._regexes) + + def __repr__(self): + return f"Distribution({self._dict})" + + def __and__(self, other): + if type(self) != type(other): + return NotImplemented + tags = self._dict["Tags"] & other._dict["Tags"] + exclude_tags = self._dict["ExcludeTags"] & other._dict["ExcludeTags"] + min_rarity = max(self._dict["MinRarity"], other._dict["MinRarity"]) + max_rarity = min(self._dict["MaxRarity"], other._dict["MaxRarity"]) + return Distribution( + {"Tags": tags, "ExcludeTags": exclude_tags, + "MinRarity": min_rarity, "MaxRarity": max_rarity}) + + def __or__(self, other): + if type(self) != type(other): + return NotImplemented + tags = self._dict["Tags"] | other._dict["Tags"] + exclude_tags = self._dict["ExcludeTags"] | other._dict["ExcludeTags"] + min_rarity = min(self._dict["MinRarity"], other._dict["MinRarity"]) + max_rarity = max(self._dict["MaxRarity"], other._dict["MaxRarity"]) + return Distribution( + {"Tags": tags, "ExcludeTags": exclude_tags, + "MinRarity": min_rarity, "MaxRarity": max_rarity}) + + + def __iand__(self, other): + if type(self) != type(other): + return NotImplemented + self._dict["Tags"] &= other._dict["Tags"] + self._dict["ExcludeTags"] &= other._dict["ExcludeTags"] + self._dict["MinRarity"] = max(self._dict["MinRarity"], other._dict["MinRarity"]) + self._dict["MaxRarity"] = min(self._dict["MaxRarity"], other._dict["MaxRarity"]) + self._load_regexes() + return self + + def __ior__(self, other): + if type(self) != type(other): + return NotImplemented + self._dict["Tags"] |= other._dict["Tags"] + self._dict["ExcludeTags"] |= other._dict["ExcludeTags"] + self._dict["MinRarity"] = min(self._dict["MinRarity"], other._dict["MinRarity"]) + self._dict["MaxRarity"] = max(self._dict["MaxRarity"], other._dict["MaxRarity"]) + self._load_regexes() + return self diff --git a/pywhat/helper.py b/pywhat/helper.py index 17019ce..252cbca 100644 --- a/pywhat/helper.py +++ b/pywhat/helper.py @@ -15,3 +15,11 @@ def __init__(self): def get_tags(self): return self.tags + + +class InvalidTag(Exception): + """ + This exception should be raised when Distribution() gets a filter + containing non-existent tags. + """ + pass diff --git a/pywhat/identifier.py b/pywhat/identifier.py index 3c7ad21..863bdd8 100644 --- a/pywhat/identifier.py +++ b/pywhat/identifier.py @@ -13,9 +13,9 @@ def __init__(self, distribution: Optional[Distribution] = None): self.distribution = Distribution() else: self.distribution = distribution - self.regex_id = RegexIdentifier() - self.file_sig = FileSignatures() - self.name_that_hash = Nth() + self._regex_id = RegexIdentifier() + self._file_sig = FileSignatures() + self._name_that_hash = Nth() def identify(self, text: str, distribution: Distribution = None, api=False) -> dict: @@ -24,9 +24,9 @@ def identify(self, text: str, distribution: Distribution = None, identify_obj = {} magic_numbers = None - if not api and self.file_exists(text): - magic_numbers = self.file_sig.open_binary_scan_magic_nums(text) - text = self.file_sig.open_file_loc(text) + if not api and self._file_exists(text): + magic_numbers = self._file_sig.open_binary_scan_magic_nums(text) + text = self._file_sig.open_file_loc(text) identify_obj["File Signatures"] = magic_numbers else: text = [text] @@ -34,14 +34,14 @@ def identify(self, text: str, distribution: Distribution = None, if not magic_numbers: # If file doesn't exist, check to see if the inputted text is # a file in hex format - identify_obj["File Signatures"] = self.file_sig.check_magic_nums(text) + identify_obj["File Signatures"] = self._file_sig.check_magic_nums(text) - identify_obj["Regexes"] = self.regex_id.check(text, distribution) + identify_obj["Regexes"] = self._regex_id.check(text, distribution) # get_hashes takes a list of hashes, we split to give it a list - # identify_obj["Hashes"] = self.name_that_hash.get_hashes(text.split()) + # identify_obj["Hashes"] = self._name_that_hash.get_hashes(text.split()) return identify_obj - def file_exists(self, text): + def _file_exists(self, text): return os.path.isfile(text) diff --git a/pywhat/what.py b/pywhat/what.py index f196631..9e5d258 100644 --- a/pywhat/what.py +++ b/pywhat/what.py @@ -77,9 +77,16 @@ def main(text_input, rarity, include_tags, exclude_tags): included_tags = list(map(str.strip, include_tags.split(','))) if exclude_tags is not None: excluded_tags = list(map(str.strip, exclude_tags.split(','))) - distribution = Distribution( - {"Tags": included_tags, "ExcludeTags": excluded_tags, - "MinRarity": min_rarity, "MaxRarity": max_rarity}) + + try: + distribution = Distribution( + {"Tags": included_tags, "ExcludeTags": excluded_tags, + "MinRarity": min_rarity, "MaxRarity": max_rarity}) + except helper.InvalidTag: + print("Passed tags are not valid.\n" \ + "You can check available tags by using: 'pywhat --tags'") + sys.exit(1) + what_obj = What_Object(distribution) identified_output = what_obj.what_is_this(text_input) From bbe5eadab5ff88d435f201e6706ee2a1270b3760 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Fri, 4 Jun 2021 10:24:49 +0300 Subject: [PATCH 13/18] Update 'Distribution' and add tests for it --- pywhat/distribution.py | 20 ++---- pywhat/what.py | 2 +- tests/test_distribution.py | 123 +++++++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 16 deletions(-) create mode 100644 tests/test_distribution.py diff --git a/pywhat/distribution.py b/pywhat/distribution.py index a1d47d0..90dcf03 100644 --- a/pywhat/distribution.py +++ b/pywhat/distribution.py @@ -26,9 +26,6 @@ def __init__(self, filters_dict: Optional[dict] = None): self._dict["MinRarity"] = filters_dict.setdefault("MinRarity", 0) self._dict["MaxRarity"] = filters_dict.setdefault("MaxRarity", 1) - if len(self._dict["Tags"]) == 0: - self._dict["Tags"] = tags - if not self._dict["Tags"].issubset(tags) or not self._dict["ExcludeTags"].issubset(tags): raise InvalidTag("Passed filter contains tags that are not used by 'what'") @@ -58,6 +55,9 @@ def _filter(self): def get_regexes(self): return list(self._regexes) + def get_filter(self): + return dict(self._dict) + def __repr__(self): return f"Distribution({self._dict})" @@ -87,19 +87,9 @@ def __or__(self, other): def __iand__(self, other): if type(self) != type(other): return NotImplemented - self._dict["Tags"] &= other._dict["Tags"] - self._dict["ExcludeTags"] &= other._dict["ExcludeTags"] - self._dict["MinRarity"] = max(self._dict["MinRarity"], other._dict["MinRarity"]) - self._dict["MaxRarity"] = min(self._dict["MaxRarity"], other._dict["MaxRarity"]) - self._load_regexes() - return self + return self & other def __ior__(self, other): if type(self) != type(other): return NotImplemented - self._dict["Tags"] |= other._dict["Tags"] - self._dict["ExcludeTags"] |= other._dict["ExcludeTags"] - self._dict["MinRarity"] = min(self._dict["MinRarity"], other._dict["MinRarity"]) - self._dict["MaxRarity"] = max(self._dict["MaxRarity"], other._dict["MaxRarity"]) - self._load_regexes() - return self + return self | other diff --git a/pywhat/what.py b/pywhat/what.py index 9e5d258..51203f1 100644 --- a/pywhat/what.py +++ b/pywhat/what.py @@ -57,7 +57,7 @@ def main(text_input, rarity, include_tags, exclude_tags): min_rarity = 0 max_rarity = 1 - included_tags = [] + included_tags = list(helper.AvailableTags().get_tags()) excluded_tags = [] if rarity is not None: diff --git a/tests/test_distribution.py b/tests/test_distribution.py new file mode 100644 index 0000000..d791417 --- /dev/null +++ b/tests/test_distribution.py @@ -0,0 +1,123 @@ +import json +import os + +import pytest +from pywhat import AvailableTags, Distribution +from pywhat.helper import InvalidTag + + +def test_distribution(): + dist = Distribution() + path = "../pywhat/Data/regex.json" + fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) + with open(fullpath, "r", encoding="utf-8") as myfile: + regexes = json.load(myfile) + assert regexes == dist.get_regexes() + + +def test_distribution2(): + filter = { + "MinRarity": 0.3, + "MaxRarity": 0.8, + "Tags": ["Networking"], + "ExcludeTags": ["Identifiers"], + } + dist = Distribution(filter) + path = "../pywhat/Data/regex.json" + fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) + with open(fullpath, "r", encoding="utf-8") as myfile: + regexes = json.load(myfile) + for regex in regexes: + if ( + 0.3 <= regex["Rarity"] <= 0.8 + and "Networking" in regex["Tags"] + and "Identifiers" not in regex["Tags"] + ): + assert regex in dist.get_regexes() + + +def test_distribution3(): + filter1 = {"MinRarity": 0.3, "Tags": ["Networking"], "ExcludeTags": ["Identifiers"]} + filter2 = {"MinRarity": 0.4, "MaxRarity": 0.8, "ExcludeTags": ["Media"]} + dist = Distribution(filter1) & Distribution(filter2) + path = "../pywhat/Data/regex.json" + fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) + with open(fullpath, "r", encoding="utf-8") as myfile: + regexes = json.load(myfile) + assert dist._dict["MinRarity"] == 0.4 + assert dist._dict["MaxRarity"] == 0.8 + assert dist._dict["Tags"] == {"Networking"} + assert dist._dict["ExcludeTags"] == set() + + for regex in regexes: + if 0.4 <= regex["Rarity"] <= 0.8 and "Networking" in regex["Tags"]: + assert regex in dist.get_regexes() + + +def test_distribution4(): + filter1 = {"MinRarity": 0.3, "Tags": ["Networking"], "ExcludeTags": ["Identifiers"]} + filter2 = {"MinRarity": 0.4, "MaxRarity": 0.8, "ExcludeTags": ["Media"]} + dist = Distribution(filter2) + dist &= Distribution(filter1) + path = "../pywhat/Data/regex.json" + fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) + with open(fullpath, "r", encoding="utf-8") as myfile: + regexes = json.load(myfile) + assert dist._dict["MinRarity"] == 0.4 + assert dist._dict["MaxRarity"] == 0.8 + assert dist._dict["Tags"] == {"Networking"} + assert dist._dict["ExcludeTags"] == set() + + for regex in regexes: + if 0.4 <= regex["Rarity"] <= 0.8 and "Networking" in regex["Tags"]: + assert regex in dist.get_regexes() + + +def test_distribution5(): + filter1 = {"MinRarity": 0.3, "Tags": ["Networking"], "ExcludeTags": ["Identifiers"]} + filter2 = {"MinRarity": 0.4, "MaxRarity": 0.8, "ExcludeTags": ["Media"]} + dist = Distribution(filter1) | Distribution(filter2) + path = "../pywhat/Data/regex.json" + fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) + with open(fullpath, "r", encoding="utf-8") as myfile: + regexes = json.load(myfile) + assert dist._dict["MinRarity"] == 0.3 + assert dist._dict["MaxRarity"] == 1 + assert dist._dict["Tags"] == AvailableTags().get_tags() + assert dist._dict["ExcludeTags"] == {"Identifiers", "Media"} + + for regex in regexes: + if ( + 0.3 <= regex["Rarity"] <= 1 + and "Identifiers" not in regex["Tags"] + and "Media" not in regex["Tags"] + ): + assert regex in dist.get_regexes() + + +def test_distribution6(): + filter1 = {"MinRarity": 0.3, "Tags": ["Networking"], "ExcludeTags": ["Identifiers"]} + filter2 = {"MinRarity": 0.4, "MaxRarity": 0.8, "ExcludeTags": ["Media"]} + dist = Distribution(filter2) + dist |= Distribution(filter1) + path = "../pywhat/Data/regex.json" + fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) + with open(fullpath, "r", encoding="utf-8") as myfile: + regexes = json.load(myfile) + assert dist._dict["MinRarity"] == 0.3 + assert dist._dict["MaxRarity"] == 1 + assert dist._dict["Tags"] == AvailableTags().get_tags() + assert dist._dict["ExcludeTags"] == {"Identifiers", "Media"} + + for regex in regexes: + if ( + 0.3 <= regex["Rarity"] <= 1 + and "Identifiers" not in regex["Tags"] + and "Media" not in regex["Tags"] + ): + assert regex in dist.get_regexes() + + +def test_distribution7(): + with pytest.raises(InvalidTag): + dist = Distribution({"Tags": "Media", "MinRarity": 0.7}) From 650732dffaccd81f0a670318a2a88181d076f4c3 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Fri, 4 Jun 2021 16:06:37 +0300 Subject: [PATCH 14/18] Clean up and update tests --- pywhat/distribution.py | 2 +- pywhat/identifier.py | 8 ++--- pywhat/what.py | 69 +++++++++++++++++++------------------- tests/test_click.py | 24 +++++++++++-- tests/test_distribution.py | 6 ++-- tests/test_identifier.py | 42 ++++++++++------------- 6 files changed, 81 insertions(+), 70 deletions(-) diff --git a/pywhat/distribution.py b/pywhat/distribution.py index 90dcf03..cdc1186 100644 --- a/pywhat/distribution.py +++ b/pywhat/distribution.py @@ -12,7 +12,7 @@ class Distribution: Example filters: * {"Tags": ["Networking"]} - * {"Tags": ["Identifiers"], "ExcludeTags": ["Credentials"], "MinRarity":0.6} + * {"Tags": ["Identifiers"], "ExcludeTags": ["Credentials"], "MinRarity": 0.6} """ def __init__(self, filters_dict: Optional[dict] = None): diff --git a/pywhat/identifier.py b/pywhat/identifier.py index 863bdd8..85ab611 100644 --- a/pywhat/identifier.py +++ b/pywhat/identifier.py @@ -17,10 +17,10 @@ def __init__(self, distribution: Optional[Distribution] = None): self._file_sig = FileSignatures() self._name_that_hash = Nth() - def identify(self, text: str, distribution: Distribution = None, + def identify(self, text: str, dist: Distribution = None, api=False) -> dict: - if distribution is None: - distribution = self.distribution + if dist is None: + dist = self.distribution identify_obj = {} magic_numbers = None @@ -36,7 +36,7 @@ def identify(self, text: str, distribution: Distribution = None, # a file in hex format identify_obj["File Signatures"] = self._file_sig.check_magic_nums(text) - identify_obj["Regexes"] = self._regex_id.check(text, distribution) + identify_obj["Regexes"] = self._regex_id.check(text, dist) # get_hashes takes a list of hashes, we split to give it a list # identify_obj["Hashes"] = self._name_that_hash.get_hashes(text.split()) diff --git a/pywhat/what.py b/pywhat/what.py index 51203f1..bdeb21b 100644 --- a/pywhat/what.py +++ b/pywhat/what.py @@ -3,18 +3,47 @@ import click from rich.console import Console -from pywhat import helper, identifier, printer +from pywhat import identifier, printer from pywhat.distribution import Distribution +from pywhat.helper import AvailableTags, InvalidTag def print_tags(ctx, opts, value): if value: - tags = sorted(helper.AvailableTags().get_tags()) + tags = sorted(AvailableTags().get_tags()) console = Console() console.print("[bold #D7Afff]" + "\n".join(tags) + "[/bold #D7Afff]") sys.exit() +def parse_options(rarity, include_tags, exclude_tags): + filter = dict() + if rarity is not None: + rarities = rarity.split(":") + if len(rarities) != 2: + print("Invalid rarity range format ('min:max' expected)") + sys.exit(1) + try: + if not rarities[0].isspace() and rarities[0]: + filter["MinRarity"] = float(rarities[0]) + if not rarities[1].isspace() and rarities[1]: + filter["MaxRarity"] = float(rarities[1]) + except ValueError: + print("Invalid rarity argument (float expected)") + sys.exit(1) + if include_tags is not None: + filter["Tags"] = list(map(str.strip, include_tags.split(','))) + if exclude_tags is not None: + filter["ExcludeTags"] = list(map(str.strip, exclude_tags.split(','))) + + try: + distribution = Distribution(filter) + except InvalidTag: + print("Passed tags are not valid.\n" \ + "You can check available tags by using: 'pywhat --tags'") + sys.exit(1) + + @click.command( context_settings=dict( ignore_unknown_options=True, @@ -55,39 +84,9 @@ def main(text_input, rarity, include_tags, exclude_tags): """ - min_rarity = 0 - max_rarity = 1 - included_tags = list(helper.AvailableTags().get_tags()) - excluded_tags = [] - - if rarity is not None: - rarities = rarity.split(":") - if len(rarities) != 2: - print("Invalid rarity range format ('min:max' expected)") - sys.exit(1) - try: - if not rarities[0].isspace() and rarities[0]: - min_rarity = float(rarities[0]) - if not rarities[1].isspace() and rarities[1]: - max_rarity = float(rarities[1]) - except ValueError: - print("Invalid rarity argument (float expected)") - sys.exit(1) - if include_tags is not None: - included_tags = list(map(str.strip, include_tags.split(','))) - if exclude_tags is not None: - excluded_tags = list(map(str.strip, exclude_tags.split(','))) - - try: - distribution = Distribution( - {"Tags": included_tags, "ExcludeTags": excluded_tags, - "MinRarity": min_rarity, "MaxRarity": max_rarity}) - except helper.InvalidTag: - print("Passed tags are not valid.\n" \ - "You can check available tags by using: 'pywhat --tags'") - sys.exit(1) - - what_obj = What_Object(distribution) + what_obj = What_Object( + parse_options(rarity, include_tags, exclude_tags) + ) identified_output = what_obj.what_is_this(text_input) p = printer.Printing() diff --git a/tests/test_click.py b/tests/test_click.py index de8532d..6019684 100644 --- a/tests/test_click.py +++ b/tests/test_click.py @@ -1,7 +1,9 @@ -from click.testing import CliRunner -from pywhat.what import main import re + import pytest +from click.testing import CliRunner +from pywhat import pywhat_tags +from pywhat.what import main def test_hello_world(): @@ -11,6 +13,22 @@ def test_hello_world(): assert "THM{" in result.output +def test_filtration(): + runner = CliRunner() + result = runner.invoke(main, ["--rarity", "0.5:", "--include_tags", "Identifiers,Media", "fixtures/file"]) + assert result.exit_code == 0 + assert "THM{" in result.output + assert "ETH" in result.output + + +def test_tag_printing(): + runner = CliRunner() + result = runner.invoke(main, "--tags") + assert result.exit_code == 0 + for tag in pywhat_tags: + assert tag in result.output + + def test_file_fixture(): runner = CliRunner() result = runner.invoke(main, ["fixtures/file"]) @@ -340,4 +358,4 @@ def test_file_arn4(): runner = CliRunner() result = runner.invoke(main, ["arn:aws:s3:::my_corporate_bucket/Development/*"]) assert result.exit_code == 0 - assert re.findall("ARN", str(result.output)) \ No newline at end of file + assert re.findall("ARN", str(result.output)) diff --git a/tests/test_distribution.py b/tests/test_distribution.py index d791417..390ecd3 100644 --- a/tests/test_distribution.py +++ b/tests/test_distribution.py @@ -2,7 +2,7 @@ import os import pytest -from pywhat import AvailableTags, Distribution +from pywhat import pywhat_tags, Distribution from pywhat.helper import InvalidTag @@ -83,7 +83,7 @@ def test_distribution5(): regexes = json.load(myfile) assert dist._dict["MinRarity"] == 0.3 assert dist._dict["MaxRarity"] == 1 - assert dist._dict["Tags"] == AvailableTags().get_tags() + assert dist._dict["Tags"] == pywhat_tags assert dist._dict["ExcludeTags"] == {"Identifiers", "Media"} for regex in regexes: @@ -106,7 +106,7 @@ def test_distribution6(): regexes = json.load(myfile) assert dist._dict["MinRarity"] == 0.3 assert dist._dict["MaxRarity"] == 1 - assert dist._dict["Tags"] == AvailableTags().get_tags() + assert dist._dict["Tags"] == pywhat_tags assert dist._dict["ExcludeTags"] == {"Identifiers", "Media"} for regex in regexes: diff --git a/tests/test_identifier.py b/tests/test_identifier.py index 3f3d4cb..0bd9e79 100644 --- a/tests/test_identifier.py +++ b/tests/test_identifier.py @@ -1,3 +1,4 @@ +from pywhat.distribution import Distribution from pywhat import identifier @@ -6,28 +7,21 @@ def test_identifier_works(): out = r.identify("DANHz6EQVoWyZ9rER56DwTXHWUxfkv9k2o") assert "Dogecoin (DOGE) Wallet Address" in out["Regexes"][0]["Regex Pattern"]["Name"] -""" -def test_rarity_filtration(): - r = identifier.Identifier() - out = r.identify("someguy@gmail.com", min_rarity=0.6) - assert len(out["Regexes"]) == 0 - - -def test_rarity_filtration2(): - r = identifier.Identifier() - out = r.identify("ScOAntcCa78", max_rarity=0.1) - assert len(out["Regexes"]) == 0 - -def test_tag_filtration(): - r = identifier.Identifier() - out = r.identify("fixtures/file", included_tags=["Cyber Security"]) - for regex in out["Regexes"]: - assert "Cyber Security" in regex["Regex Pattern"]["Tags"] - - -def test_tag_filtration2(): - r = identifier.Identifier() - out = r.identify("+91 (385) 985 2821", excluded_tags=["Identifiers", "Credentials"]) - assert len(out["Regexes"]) == 0 -""" \ No newline at end of file +def test_identifier_filtration(): + filter = {"Tags": ["Password"]} + r = identifier.Identifier(Distribution(filter)) + regexes = r.identify('fixtures/file')["Regexes"] + for regex in regexes: + assert "Password" in regex["Regex Pattern"]["Tags"] + + +def test_identifier_filtration2(): + filter1 = {"ExcludeTags": ["Identifiers"]} + filter2 = {"Tags": ["Identifiers"], "MinRarity": 0.6} + r = identifier.Identifier(Distribution(filter1)) + regexes = r.identify('fixtures/file', dist=Distribution(filter2))["Regexes"] + for regex in regexes: + assert "Identifiers" in regex["Regex Pattern"]["Tags"] + assert regex["Regex Pattern"]["Rarity"] >= 0.6 + \ No newline at end of file From c0bce292b254cda3c6fb9c3de18d5293e8b5b3ff Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Sat, 5 Jun 2021 19:34:38 +0300 Subject: [PATCH 15/18] Another cleanup --- pywhat/distribution.py | 12 ++---------- pywhat/helper.py | 13 +++++++++---- tests/test_distribution.py | 32 +++++++------------------------- tests/test_regex_identifier.py | 1 - 4 files changed, 18 insertions(+), 40 deletions(-) diff --git a/pywhat/distribution.py b/pywhat/distribution.py index cdc1186..b7a4b29 100644 --- a/pywhat/distribution.py +++ b/pywhat/distribution.py @@ -1,8 +1,6 @@ -import json -import os from typing import Optional -from pywhat.helper import AvailableTags, InvalidTag +from pywhat.helper import AvailableTags, InvalidTag, load_regexes class Distribution: @@ -29,13 +27,7 @@ def __init__(self, filters_dict: Optional[dict] = None): if not self._dict["Tags"].issubset(tags) or not self._dict["ExcludeTags"].issubset(tags): raise InvalidTag("Passed filter contains tags that are not used by 'what'") - self._load_regexes() - - def _load_regexes(self): - path = "Data/regex.json" - fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) - with open(fullpath, "r", encoding="utf-8") as myfile: - self._regexes = json.load(myfile) + self._regexes = load_regexes() self._filter() def _filter(self): diff --git a/pywhat/helper.py b/pywhat/helper.py index 252cbca..ed3b13d 100644 --- a/pywhat/helper.py +++ b/pywhat/helper.py @@ -6,10 +6,7 @@ class AvailableTags(): def __init__(self): self.tags = set() - path = "Data/regex.json" - fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) - with open(fullpath, "r", encoding="utf-8") as myfile: - regexes = json.load(myfile) + regexes = load_regexes() for regex in regexes: self.tags.update(regex["Tags"]) @@ -23,3 +20,11 @@ class InvalidTag(Exception): containing non-existent tags. """ pass + + + +def load_regexes() -> list: + path = "Data/regex.json" + fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) + with open(fullpath, "r", encoding="utf-8") as myfile: + return json.load(myfile) \ No newline at end of file diff --git a/tests/test_distribution.py b/tests/test_distribution.py index 390ecd3..30a459e 100644 --- a/tests/test_distribution.py +++ b/tests/test_distribution.py @@ -3,15 +3,12 @@ import pytest from pywhat import pywhat_tags, Distribution -from pywhat.helper import InvalidTag +from pywhat.helper import InvalidTag, load_regexes def test_distribution(): dist = Distribution() - path = "../pywhat/Data/regex.json" - fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) - with open(fullpath, "r", encoding="utf-8") as myfile: - regexes = json.load(myfile) + regexes = load_regexes() assert regexes == dist.get_regexes() @@ -23,10 +20,7 @@ def test_distribution2(): "ExcludeTags": ["Identifiers"], } dist = Distribution(filter) - path = "../pywhat/Data/regex.json" - fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) - with open(fullpath, "r", encoding="utf-8") as myfile: - regexes = json.load(myfile) + regexes = load_regexes() for regex in regexes: if ( 0.3 <= regex["Rarity"] <= 0.8 @@ -40,10 +34,7 @@ def test_distribution3(): filter1 = {"MinRarity": 0.3, "Tags": ["Networking"], "ExcludeTags": ["Identifiers"]} filter2 = {"MinRarity": 0.4, "MaxRarity": 0.8, "ExcludeTags": ["Media"]} dist = Distribution(filter1) & Distribution(filter2) - path = "../pywhat/Data/regex.json" - fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) - with open(fullpath, "r", encoding="utf-8") as myfile: - regexes = json.load(myfile) + regexes = load_regexes() assert dist._dict["MinRarity"] == 0.4 assert dist._dict["MaxRarity"] == 0.8 assert dist._dict["Tags"] == {"Networking"} @@ -59,10 +50,7 @@ def test_distribution4(): filter2 = {"MinRarity": 0.4, "MaxRarity": 0.8, "ExcludeTags": ["Media"]} dist = Distribution(filter2) dist &= Distribution(filter1) - path = "../pywhat/Data/regex.json" - fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) - with open(fullpath, "r", encoding="utf-8") as myfile: - regexes = json.load(myfile) + regexes = load_regexes() assert dist._dict["MinRarity"] == 0.4 assert dist._dict["MaxRarity"] == 0.8 assert dist._dict["Tags"] == {"Networking"} @@ -77,10 +65,7 @@ def test_distribution5(): filter1 = {"MinRarity": 0.3, "Tags": ["Networking"], "ExcludeTags": ["Identifiers"]} filter2 = {"MinRarity": 0.4, "MaxRarity": 0.8, "ExcludeTags": ["Media"]} dist = Distribution(filter1) | Distribution(filter2) - path = "../pywhat/Data/regex.json" - fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) - with open(fullpath, "r", encoding="utf-8") as myfile: - regexes = json.load(myfile) + regexes = load_regexes() assert dist._dict["MinRarity"] == 0.3 assert dist._dict["MaxRarity"] == 1 assert dist._dict["Tags"] == pywhat_tags @@ -100,10 +85,7 @@ def test_distribution6(): filter2 = {"MinRarity": 0.4, "MaxRarity": 0.8, "ExcludeTags": ["Media"]} dist = Distribution(filter2) dist |= Distribution(filter1) - path = "../pywhat/Data/regex.json" - fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) - with open(fullpath, "r", encoding="utf-8") as myfile: - regexes = json.load(myfile) + regexes = load_regexes() assert dist._dict["MinRarity"] == 0.3 assert dist._dict["MaxRarity"] == 1 assert dist._dict["Tags"] == pywhat_tags diff --git a/tests/test_regex_identifier.py b/tests/test_regex_identifier.py index 9e81388..284a5ad 100644 --- a/tests/test_regex_identifier.py +++ b/tests/test_regex_identifier.py @@ -4,7 +4,6 @@ def test_regex_successfully_parses(): r = regex_identifier.RegexIdentifier() - print(r.distribution.get_regexes) assert "Name" in r.distribution.get_regexes()[0] From 5fc4e97ad3527e7d41c4a4c642f9d2fc297b019b Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Sun, 6 Jun 2021 12:14:58 +0300 Subject: [PATCH 16/18] Fix CLI --- pywhat/what.py | 10 ++++++---- tests/test_click.py | 7 +++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pywhat/what.py b/pywhat/what.py index bdeb21b..a35c2a7 100644 --- a/pywhat/what.py +++ b/pywhat/what.py @@ -43,6 +43,8 @@ def parse_options(rarity, include_tags, exclude_tags): "You can check available tags by using: 'pywhat --tags'") sys.exit(1) + return distribution + @click.command( context_settings=dict( @@ -50,10 +52,10 @@ def parse_options(rarity, include_tags, exclude_tags): ) ) @click.argument("text_input", required=True) -@click.option("--tags", is_flag=True, expose_value=False, callback=print_tags, help="Show available tags and exit.") -@click.option("--rarity", help="Filter by rarity.") -@click.option("--include_tags", help="Only print entries with included tags.") -@click.option("--exclude_tags", help="Exclude tags.") +@click.option("-t", "--tags", is_flag=True, expose_value=False, callback=print_tags, help="Show available tags and exit.") +@click.option("-r", "--rarity", help="Filter by rarity. This is in the range of 0:1. To filter only items past 0.5, use 0.5: with the colon on the end.") +@click.option("-i", "--include_tags", help="Only print entries with included tags.") +@click.option("-e", "--exclude_tags", help="Exclude tags.") def main(text_input, rarity, include_tags, exclude_tags): """ What - Identify what something is.\n diff --git a/tests/test_click.py b/tests/test_click.py index 6019684..a85890c 100644 --- a/tests/test_click.py +++ b/tests/test_click.py @@ -17,8 +17,11 @@ def test_filtration(): runner = CliRunner() result = runner.invoke(main, ["--rarity", "0.5:", "--include_tags", "Identifiers,Media", "fixtures/file"]) assert result.exit_code == 0 - assert "THM{" in result.output - assert "ETH" in result.output + assert "THM{" not in result.output + assert "ETH" not in result.output + assert "Email Address" in result.output + assert "IP" in result.output + assert "URL" in result.output def test_tag_printing(): From 69053078dc6c7f0bc95dadddf644e2a6680efec9 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Sun, 6 Jun 2021 13:21:37 +0300 Subject: [PATCH 17/18] Make tags case insensitive --- pywhat/distribution.py | 9 ++++----- pywhat/helper.py | 35 ++++++++++++++++++++++++++++++++--- tests/test_distribution.py | 18 +++++++++--------- 3 files changed, 45 insertions(+), 17 deletions(-) diff --git a/pywhat/distribution.py b/pywhat/distribution.py index b7a4b29..ce5ee6e 100644 --- a/pywhat/distribution.py +++ b/pywhat/distribution.py @@ -1,6 +1,6 @@ from typing import Optional -from pywhat.helper import AvailableTags, InvalidTag, load_regexes +from pywhat.helper import AvailableTags, CaseInsensitiveSet, InvalidTag, load_regexes class Distribution: @@ -14,16 +14,15 @@ class Distribution: """ def __init__(self, filters_dict: Optional[dict] = None): - tags = AvailableTags().get_tags() + tags = CaseInsensitiveSet(AvailableTags().get_tags()) self._dict = dict() if filters_dict is None: filters_dict = dict() - self._dict["Tags"] = set(filters_dict.setdefault("Tags", tags)) - self._dict["ExcludeTags"] = set(filters_dict.setdefault("ExcludeTags", set())) + self._dict["Tags"] = CaseInsensitiveSet(filters_dict.setdefault("Tags", tags)) + self._dict["ExcludeTags"] = CaseInsensitiveSet(filters_dict.setdefault("ExcludeTags", set())) self._dict["MinRarity"] = filters_dict.setdefault("MinRarity", 0) self._dict["MaxRarity"] = filters_dict.setdefault("MaxRarity", 1) - if not self._dict["Tags"].issubset(tags) or not self._dict["ExcludeTags"].issubset(tags): raise InvalidTag("Passed filter contains tags that are not used by 'what'") diff --git a/pywhat/helper.py b/pywhat/helper.py index ed3b13d..bd755a1 100644 --- a/pywhat/helper.py +++ b/pywhat/helper.py @@ -1,9 +1,10 @@ """Helper utilities""" +import collections.abc import json import os.path -class AvailableTags(): +class AvailableTags: def __init__(self): self.tags = set() regexes = load_regexes() @@ -19,12 +20,40 @@ class InvalidTag(Exception): This exception should be raised when Distribution() gets a filter containing non-existent tags. """ - pass + pass def load_regexes() -> list: path = "Data/regex.json" fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) with open(fullpath, "r", encoding="utf-8") as myfile: - return json.load(myfile) \ No newline at end of file + return json.load(myfile) + + +class CaseInsensitiveSet(collections.abc.Set): + def __init__(self, iterable=None): + self._elements = set() + if iterable is not None: + self._elements = set(map(self._lower, iterable)) + + def _lower(self, value): + return value.lower() if isinstance(value, str) else value + + def __contains__(self, value): + return self._lower(value) in self._elements + + def __iter__(self): + return iter(self._elements) + + def __len__(self): + return len(self._elements) + + def __repr__(self): + return self._elements.__repr__() + + def issubset(self, other): + for value in self: + if value not in other: + return False + return True diff --git a/tests/test_distribution.py b/tests/test_distribution.py index 30a459e..e71c6e4 100644 --- a/tests/test_distribution.py +++ b/tests/test_distribution.py @@ -3,7 +3,7 @@ import pytest from pywhat import pywhat_tags, Distribution -from pywhat.helper import InvalidTag, load_regexes +from pywhat.helper import CaseInsensitiveSet, InvalidTag, load_regexes def test_distribution(): @@ -37,8 +37,8 @@ def test_distribution3(): regexes = load_regexes() assert dist._dict["MinRarity"] == 0.4 assert dist._dict["MaxRarity"] == 0.8 - assert dist._dict["Tags"] == {"Networking"} - assert dist._dict["ExcludeTags"] == set() + assert dist._dict["Tags"] == CaseInsensitiveSet(["Networking"]) + assert dist._dict["ExcludeTags"] == CaseInsensitiveSet() for regex in regexes: if 0.4 <= regex["Rarity"] <= 0.8 and "Networking" in regex["Tags"]: @@ -53,8 +53,8 @@ def test_distribution4(): regexes = load_regexes() assert dist._dict["MinRarity"] == 0.4 assert dist._dict["MaxRarity"] == 0.8 - assert dist._dict["Tags"] == {"Networking"} - assert dist._dict["ExcludeTags"] == set() + assert dist._dict["Tags"] == CaseInsensitiveSet(["Networking"]) + assert dist._dict["ExcludeTags"] == CaseInsensitiveSet() for regex in regexes: if 0.4 <= regex["Rarity"] <= 0.8 and "Networking" in regex["Tags"]: @@ -68,8 +68,8 @@ def test_distribution5(): regexes = load_regexes() assert dist._dict["MinRarity"] == 0.3 assert dist._dict["MaxRarity"] == 1 - assert dist._dict["Tags"] == pywhat_tags - assert dist._dict["ExcludeTags"] == {"Identifiers", "Media"} + assert dist._dict["Tags"] == CaseInsensitiveSet(pywhat_tags) + assert dist._dict["ExcludeTags"] == CaseInsensitiveSet(["Identifiers", "Media"]) for regex in regexes: if ( @@ -88,8 +88,8 @@ def test_distribution6(): regexes = load_regexes() assert dist._dict["MinRarity"] == 0.3 assert dist._dict["MaxRarity"] == 1 - assert dist._dict["Tags"] == pywhat_tags - assert dist._dict["ExcludeTags"] == {"Identifiers", "Media"} + assert dist._dict["Tags"] == CaseInsensitiveSet(pywhat_tags) + assert dist._dict["ExcludeTags"] == CaseInsensitiveSet(["Identifiers", "Media"]) for regex in regexes: if ( From 95566f076273eb94c46cb6952841e539d57c65d6 Mon Sep 17 00:00:00 2001 From: Kanstantinas Piatrashka Date: Sun, 6 Jun 2021 13:42:54 +0300 Subject: [PATCH 18/18] Fix printing --- pywhat/printer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pywhat/printer.py b/pywhat/printer.py index 40275d1..a0a19fa 100644 --- a/pywhat/printer.py +++ b/pywhat/printer.py @@ -10,13 +10,11 @@ def pretty_print(self, text: dict): to_out = "" - if text["File Signatures"]: + if text["File Signatures"] and text["Regexes"]: to_out += "\n" to_out += f"[bold #D7Afff]File Identified[/bold #D7Afff] with Magic Numbers {text['File Signatures']['ISO 8859-1']}." to_out += f"\n[bold #D7Afff]File Description:[/bold #D7Afff] {text['File Signatures']['Description']}." to_out += "\n" - if to_out: - console.print(to_out) if text["Regexes"]: to_out += "\n[bold #D7Afff]Possible Identification[/bold #D7Afff]"