-
-
Notifications
You must be signed in to change notification settings - Fork 367
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #62 from piatrashkakanstantinass/filtration
Filtration support
- Loading branch information
Showing
13 changed files
with
406 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from pywhat.distribution import Distribution | ||
from pywhat.helper import AvailableTags | ||
from pywhat.identifier import Identifier | ||
|
||
pywhat_tags = AvailableTags().get_tags() | ||
|
||
|
||
__all__ = ["Identifier", "Distribution", "pywhat_tags"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
from typing import Optional | ||
|
||
from pywhat.helper import AvailableTags, CaseInsensitiveSet, InvalidTag, load_regexes | ||
|
||
|
||
class Distribution: | ||
""" | ||
A distribution is an object containing the regex | ||
But the regex has gone through a filter process. | ||
Example filters: | ||
* {"Tags": ["Networking"]} | ||
* {"Tags": ["Identifiers"], "ExcludeTags": ["Credentials"], "MinRarity": 0.6} | ||
""" | ||
|
||
def __init__(self, filters_dict: Optional[dict] = None): | ||
tags = CaseInsensitiveSet(AvailableTags().get_tags()) | ||
self._dict = dict() | ||
if filters_dict is None: | ||
filters_dict = dict() | ||
|
||
self._dict["Tags"] = CaseInsensitiveSet(filters_dict.setdefault("Tags", tags)) | ||
self._dict["ExcludeTags"] = CaseInsensitiveSet(filters_dict.setdefault("ExcludeTags", set())) | ||
self._dict["MinRarity"] = filters_dict.setdefault("MinRarity", 0) | ||
self._dict["MaxRarity"] = filters_dict.setdefault("MaxRarity", 1) | ||
if not self._dict["Tags"].issubset(tags) or not self._dict["ExcludeTags"].issubset(tags): | ||
raise InvalidTag("Passed filter contains tags that are not used by 'what'") | ||
|
||
self._regexes = load_regexes() | ||
self._filter() | ||
|
||
def _filter(self): | ||
temp_regexes = [] | ||
min_rarity = self._dict["MinRarity"] | ||
max_rarity = self._dict["MaxRarity"] | ||
for regex in self._regexes: | ||
if ( | ||
min_rarity <= regex["Rarity"] <= max_rarity | ||
and set(regex["Tags"]) & self._dict["Tags"] | ||
and not set(regex["Tags"]) & self._dict["ExcludeTags"] | ||
): | ||
temp_regexes.append(regex) | ||
|
||
self._regexes = temp_regexes | ||
|
||
def get_regexes(self): | ||
return list(self._regexes) | ||
|
||
def get_filter(self): | ||
return dict(self._dict) | ||
|
||
def __repr__(self): | ||
return f"Distribution({self._dict})" | ||
|
||
def __and__(self, other): | ||
if type(self) != type(other): | ||
return NotImplemented | ||
tags = self._dict["Tags"] & other._dict["Tags"] | ||
exclude_tags = self._dict["ExcludeTags"] & other._dict["ExcludeTags"] | ||
min_rarity = max(self._dict["MinRarity"], other._dict["MinRarity"]) | ||
max_rarity = min(self._dict["MaxRarity"], other._dict["MaxRarity"]) | ||
return Distribution( | ||
{"Tags": tags, "ExcludeTags": exclude_tags, | ||
"MinRarity": min_rarity, "MaxRarity": max_rarity}) | ||
|
||
def __or__(self, other): | ||
if type(self) != type(other): | ||
return NotImplemented | ||
tags = self._dict["Tags"] | other._dict["Tags"] | ||
exclude_tags = self._dict["ExcludeTags"] | other._dict["ExcludeTags"] | ||
min_rarity = min(self._dict["MinRarity"], other._dict["MinRarity"]) | ||
max_rarity = max(self._dict["MaxRarity"], other._dict["MaxRarity"]) | ||
return Distribution( | ||
{"Tags": tags, "ExcludeTags": exclude_tags, | ||
"MinRarity": min_rarity, "MaxRarity": max_rarity}) | ||
|
||
|
||
def __iand__(self, other): | ||
if type(self) != type(other): | ||
return NotImplemented | ||
return self & other | ||
|
||
def __ior__(self, other): | ||
if type(self) != type(other): | ||
return NotImplemented | ||
return self | other |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
"""Helper utilities""" | ||
import collections.abc | ||
import json | ||
import os.path | ||
|
||
|
||
class AvailableTags: | ||
def __init__(self): | ||
self.tags = set() | ||
regexes = load_regexes() | ||
for regex in regexes: | ||
self.tags.update(regex["Tags"]) | ||
|
||
def get_tags(self): | ||
return self.tags | ||
|
||
|
||
class InvalidTag(Exception): | ||
""" | ||
This exception should be raised when Distribution() gets a filter | ||
containing non-existent tags. | ||
""" | ||
|
||
pass | ||
|
||
|
||
def load_regexes() -> list: | ||
path = "Data/regex.json" | ||
fullpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) | ||
with open(fullpath, "r", encoding="utf-8") as myfile: | ||
return json.load(myfile) | ||
|
||
|
||
class CaseInsensitiveSet(collections.abc.Set): | ||
def __init__(self, iterable=None): | ||
self._elements = set() | ||
if iterable is not None: | ||
self._elements = set(map(self._lower, iterable)) | ||
|
||
def _lower(self, value): | ||
return value.lower() if isinstance(value, str) else value | ||
|
||
def __contains__(self, value): | ||
return self._lower(value) in self._elements | ||
|
||
def __iter__(self): | ||
return iter(self._elements) | ||
|
||
def __len__(self): | ||
return len(self._elements) | ||
|
||
def __repr__(self): | ||
return self._elements.__repr__() | ||
|
||
def issubset(self, other): | ||
for value in self: | ||
if value not in other: | ||
return False | ||
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,47 @@ | ||
import os.path | ||
from typing import List, Optional | ||
|
||
from pywhat.distribution import Distribution | ||
from pywhat.magic_numbers import FileSignatures | ||
from pywhat.nameThatHash import Nth | ||
from pywhat.regex_identifier import RegexIdentifier | ||
|
||
|
||
class Identifier: | ||
def __init__(self): | ||
self.regex_id = RegexIdentifier() | ||
self.file_sig = FileSignatures() | ||
self.name_that_hash = Nth() | ||
|
||
def identify(self, text: str, api=False) -> dict: | ||
def __init__(self, distribution: Optional[Distribution] = None): | ||
if distribution is None: | ||
self.distribution = Distribution() | ||
else: | ||
self.distribution = distribution | ||
self._regex_id = RegexIdentifier() | ||
self._file_sig = FileSignatures() | ||
self._name_that_hash = Nth() | ||
|
||
def identify(self, text: str, dist: Distribution = None, | ||
api=False) -> dict: | ||
if dist is None: | ||
dist = self.distribution | ||
identify_obj = {} | ||
|
||
magic_numbers = None | ||
if not api and self.file_exists(text): | ||
magic_numbers = self.file_sig.open_binary_scan_magic_nums(text) | ||
text = self.file_sig.open_file_loc(text) | ||
if not api and self._file_exists(text): | ||
magic_numbers = self._file_sig.open_binary_scan_magic_nums(text) | ||
text = self._file_sig.open_file_loc(text) | ||
identify_obj["File Signatures"] = magic_numbers | ||
else: | ||
text = [text] | ||
|
||
if not magic_numbers: | ||
# If file doesn't exist, check to see if the inputted text is | ||
# a file in hex format | ||
identify_obj["File Signatures"] = self.file_sig.check_magic_nums(text) | ||
identify_obj["Regexes"] = self.regex_id.check(text) | ||
identify_obj["File Signatures"] = self._file_sig.check_magic_nums(text) | ||
|
||
identify_obj["Regexes"] = self._regex_id.check(text, dist) | ||
|
||
# get_hashes takes a list of hashes, we split to give it a list | ||
# identify_obj["Hashes"] = self.name_that_hash.get_hashes(text.split()) | ||
# identify_obj["Hashes"] = self._name_that_hash.get_hashes(text.split()) | ||
|
||
return identify_obj | ||
|
||
def file_exists(self, text): | ||
def _file_exists(self, text): | ||
return os.path.isfile(text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,49 @@ | ||
import sys | ||
|
||
import click | ||
from rich.console import Console | ||
|
||
from pywhat import identifier, printer | ||
from pywhat.distribution import Distribution | ||
from pywhat.helper import AvailableTags, InvalidTag | ||
|
||
|
||
def print_tags(ctx, opts, value): | ||
if value: | ||
tags = sorted(AvailableTags().get_tags()) | ||
console = Console() | ||
console.print("[bold #D7Afff]" + "\n".join(tags) + "[/bold #D7Afff]") | ||
sys.exit() | ||
|
||
|
||
def parse_options(rarity, include_tags, exclude_tags): | ||
filter = dict() | ||
if rarity is not None: | ||
rarities = rarity.split(":") | ||
if len(rarities) != 2: | ||
print("Invalid rarity range format ('min:max' expected)") | ||
sys.exit(1) | ||
try: | ||
if not rarities[0].isspace() and rarities[0]: | ||
filter["MinRarity"] = float(rarities[0]) | ||
if not rarities[1].isspace() and rarities[1]: | ||
filter["MaxRarity"] = float(rarities[1]) | ||
except ValueError: | ||
print("Invalid rarity argument (float expected)") | ||
sys.exit(1) | ||
if include_tags is not None: | ||
filter["Tags"] = list(map(str.strip, include_tags.split(','))) | ||
if exclude_tags is not None: | ||
filter["ExcludeTags"] = list(map(str.strip, exclude_tags.split(','))) | ||
|
||
try: | ||
distribution = Distribution(filter) | ||
except InvalidTag: | ||
print("Passed tags are not valid.\n" \ | ||
"You can check available tags by using: 'pywhat --tags'") | ||
sys.exit(1) | ||
|
||
return distribution | ||
|
||
|
||
@click.command( | ||
|
@@ -8,14 +52,26 @@ | |
) | ||
) | ||
@click.argument("text_input", required=True) | ||
def main(text_input): | ||
@click.option("-t", "--tags", is_flag=True, expose_value=False, callback=print_tags, help="Show available tags and exit.") | ||
@click.option("-r", "--rarity", help="Filter by rarity. This is in the range of 0:1. To filter only items past 0.5, use 0.5: with the colon on the end.") | ||
@click.option("-i", "--include_tags", help="Only print entries with included tags.") | ||
@click.option("-e", "--exclude_tags", help="Exclude tags.") | ||
def main(text_input, rarity, include_tags, exclude_tags): | ||
""" | ||
What - Identify what something is.\n | ||
Made by Bee https://twitter.com/bee_sec_san\n | ||
https://github.com/bee-san\n | ||
Filtration:\n | ||
--rarity min:max\n | ||
Only print entries with rarity in range [min,max]. min and max can be omitted.\n | ||
--include_tags list\n | ||
Only include entries containing at least one tag in a list. List is a comma separated list.\n | ||
--include_tags list\n | ||
Exclude specified tags. List is a comma separated list.\n | ||
Examples: | ||
* what "HTB{this is a flag}" | ||
|
@@ -24,22 +80,27 @@ def main(text_input): | |
* what -- 52.6169586, -1.9779857 | ||
* what --rarity 0.6: [email protected] | ||
Your text must either be in quotation marks, or use the POSIX standard of "--" to mean "anything after -- is textual input". | ||
""" | ||
|
||
what_obj = What_Object() | ||
what_obj = What_Object( | ||
parse_options(rarity, include_tags, exclude_tags) | ||
) | ||
identified_output = what_obj.what_is_this(text_input) | ||
|
||
p = printer.Printing() | ||
p.pretty_print(identified_output) | ||
|
||
|
||
class What_Object: | ||
def __init__(self): | ||
self.id = identifier.Identifier() | ||
def __init__(self, distribution): | ||
self.id = identifier.Identifier(distribution) | ||
|
||
def what_is_this(self, text: str) -> dict: | ||
def what_is_this( | ||
self, text: str) -> dict: | ||
""" | ||
Returns a Python dictionary of everything that has been identified | ||
""" | ||
|
Oops, something went wrong.