diff --git a/uro/__init__.py b/uro/__init__.py index 5a5fdca..cd7ca49 100644 --- a/uro/__init__.py +++ b/uro/__init__.py @@ -1 +1 @@ -__version__ = '1.0.0-beta' +__version__ = '1.0.1' diff --git a/uro/filters.py b/uro/filters.py index fdd255f..1bf641c 100644 --- a/uro/filters.py +++ b/uro/filters.py @@ -8,11 +8,11 @@ def check_ext(path, exts): """ if '.' not in path.split('/')[-1]: return False, False - return True, path.lower().endswith(tuple(exts)) + return True, path.lower().endswith(exts) def has_ext(path, params, meta): """ - returns True if url has no extension e.g. example.com/about-us/team + returns True if url has extension e.g. example.com/about-us/team.php """ has_ext, _ = check_ext(path, []) return has_ext diff --git a/uro/uro.py b/uro/uro.py index 4352d65..0ef8170 100644 --- a/uro/uro.py +++ b/uro/uro.py @@ -1,4 +1,5 @@ import argparse +import io import re import sys from urllib.parse import urlparse @@ -34,18 +35,22 @@ active_filters.remove('removecontent') active_filters.remove('keepcontent') +keepslash = True if 'keepslash' in active_filters else False +if keepslash: + active_filters.remove('keepslash') + urlmap = {} -params_seen = [] -patterns_seen = [] +params_seen = set() +patterns_seen = set() re_int = re.compile(r'/\d+([?/]|$)') -ext_list = clean_nargs(args.blacklist) if args.blacklist else ('css', 'png', 'jpg', 'jpeg', 'svg', +ext_list = tuple(clean_nargs(args.blacklist)) if args.blacklist else tuple(('css', 'png', 'jpg', 'jpeg', 'svg', 'ico','webp', 'scss','tif','tiff','ttf','otf','woff','woff2', 'gif', 'pdf', 'bmp', 'eot', 'mp3', 'mp4', 'avi' -) +)) -vuln_params = ('file', 'document', 'folder', 'root', 'path', 'pg', 'style', 'pdf', 'template', 'php_path', 'doc', 'page', 'name', 'cat', 'dir', 'action', 'board', 'date', 'detail', 'download', 'prefix', 'include', 'inc', 'locate', 'show', 'site', 'type', 'view', 'content', 'layout', 'mod', 'conf', 'daemon', 'upload', 'log', 'ip', 'cli', 'cmd', 'exec', 'command', 'execute', 'ping', 'query', 'jump', 'code', 'reg', 'do', 'func', 'arg', 'option', 'load', 'process', 'step', 'read', 'function', 'req', 'feature', 'exe', 'module', 'payload', 'run', 'print', 'callback', 'checkout', 'checkout_url', 'continue', 'data', 'dest', 'destination', 'domain', 'feed', 'file_name', 'file_url', 'folder_url', 'forward', 'from_url', 'go', 'goto', 'host', 'html', 'image_url', 'img_url', 'load_file', 'load_url', 'login_url', 'logout', 'navigation', 'next', 'next_page', 'Open', 'out', 'page_url', 'port', 'redir', 'redirect', 'redirect_to', 'redirect_uri', 'redirect_url', 'reference', 'return', 'return_path', 'return_to', 'returnTo', 'return_url', 'rt', 'rurl', 'target', 'to', 'uri', 'url', 'val', 'validate', 'window', 'q', 's', 'search', 'lang', 'keyword', 'keywords', 'year', 'email', 'p', 'jsonp', 'api_key', 'api', 'password', 'emailto', 'token', 'username', 'csrf_token', 'unsubscribe_token', 'id', 'item', 'page_id', 'month', 'immagine', 'list_type', 'terms', 'categoryid', 'key', 'l', 'begindate', 'enddate', 'select', 'report', 'role', 'update', 'user', 'sort', 'where', 'params', 'row', 'table', 'from', 'sel', 'results', 'sleep', 'fetch', 'order', 'column', 'field', 'delete', 'string', 'number', 'filter', 'access', 'admin', 'dbg', 'debug', 'edit', 'grant', 'test', 'alter', 'clone', 'create', 'disable', 'enable', 'make', 'modify', 'rename', 'reset', 'shell', 'toggle', 'adm', 'cfg', 'open', 'img', 'filename', 'preview', 'activity') +vuln_params = set(['file', 'document', 'folder', 'root', 'path', 'pg', 'style', 'pdf', 'template', 'php_path', 'doc', 'page', 'name', 'cat', 'dir', 'action', 'board', 'date', 'detail', 'download', 'prefix', 'include', 'inc', 'locate', 'show', 'site', 'type', 'view', 'content', 'layout', 'mod', 'conf', 'daemon', 'upload', 'log', 'ip', 'cli', 'cmd', 'exec', 'command', 'execute', 'ping', 'query', 'jump', 'code', 'reg', 'do', 'func', 'arg', 'option', 'load', 'process', 'step', 'read', 'function', 'req', 'feature', 'exe', 'module', 'payload', 'run', 'print', 'callback', 'checkout', 'checkout_url', 'continue', 'data', 'dest', 'destination', 'domain', 'feed', 'file_name', 'file_url', 'folder_url', 'forward', 'from_url', 'go', 'goto', 'host', 'html', 'image_url', 'img_url', 'load_file', 'load_url', 'login_url', 'logout', 'navigation', 'next', 'next_page', 'Open', 'out', 'page_url', 'port', 'redir', 'redirect', 'redirect_to', 'redirect_uri', 'redirect_url', 'reference', 'return', 'return_path', 'return_to', 'returnTo', 'return_url', 'rt', 'rurl', 'target', 'to', 'uri', 'url', 'val', 'validate', 'window', 'q', 's', 'search', 'lang', 'keyword', 'keywords', 'year', 'email', 'p', 'jsonp', 'api_key', 'api', 'password', 'emailto', 'token', 'username', 'csrf_token', 'unsubscribe_token', 'id', 'item', 'page_id', 'month', 'immagine', 'list_type', 'terms', 'categoryid', 'key', 'l', 'begindate', 'enddate', 'select', 'report', 'role', 'update', 'user', 'sort', 'where', 'params', 'row', 'table', 'from', 'sel', 'results', 'sleep', 'fetch', 'order', 'column', 'field', 'delete', 'string', 'number', 'filter', 'access', 'admin', 'dbg', 'debug', 'edit', 'grant', 'test', 'alter', 'clone', 'create', 'disable', 'enable', 'make', 'modify', 'rename', 'reset', 'shell', 'toggle', 'adm', 'cfg', 'open', 'img', 'filename', 'preview', 'activity']) if args.whitelist: ext_list = clean_nargs(args.whitelist) @@ -56,44 +61,14 @@ def create_pattern(path): creates patterns for urls with integers in them """ new_parts = [] - for part in re.escape(path).split('/'): + last_index = 0 + for i, part in enumerate(re.escape(path).split('/')): if part.isdigit(): + last_index = i new_parts.append('\\d+') else: new_parts.append(part) - return '/'.join(new_parts) - - -def pattern_exists(pattern): - """ - checks if a int pattern exists - """ - for i, seen_pattern in enumerate(patterns_seen): - if pattern in seen_pattern: - patterns_seen[i] = pattern - return True - elif seen_pattern in pattern: - return True - return False - - -def matches_patterns(path): - """ - checks if the url matches any of the int patterns - """ - for pattern in patterns_seen: - if re.search(pattern, path): - return True - return False - -def is_new_param(params): - """ - checks if a there's an unseen param within given params - """ - for param in params: - if param in params_seen: - return False - return True + return re.compile('/'.join(new_parts[:last_index + 1])) def apply_filters(path, params): @@ -111,7 +86,6 @@ def apply_filters(path, params): 'whitelist': whitelisted, 'vuln': has_vuln_param, } - results = [] meta = { 'strict': True if ('hasext' or 'noext') in filters else False, 'ext_list': ext_list, @@ -132,35 +106,49 @@ def process_url(url): if host not in urlmap: urlmap[host] = {} path, params = url.path, params_to_dict(url.query) - has_new_param = False if not params else is_new_param(params.keys()) - new_params = [param for param in params.keys() if param not in params_seen] - params_seen.extend(new_params) - if (not params or has_new_param) and re_int.search(path): - pattern = create_pattern(path) - if not pattern_exists(pattern): - patterns_seen.append(pattern) - elif matches_patterns(path): - return keep_url = apply_filters(path, params) - if keep_url: - if path not in urlmap[host]: - urlmap[host][path] = [params] if params else [] - elif has_new_param or compare_params(urlmap[host][path], params): + if not keep_url: + return + new_params = [] if not params else [param for param in params.keys() if param not in params_seen] + params_seen.update(new_params) + new_path = path not in urlmap[host] + if new_path: + if re_int.search(path): + pattern = create_pattern(path) + if pattern in patterns_seen: + return + patterns_seen.add(pattern) + urlmap[host][path] = [] + if params: + urlmap[host][path].append(params) + else: + if new_params: + urlmap[host][path].append(params) + elif compare_params(urlmap[host][path], params): urlmap[host][path].append(params) + +def process_line(line): + """ + processes a single line from input + """ + cleanline = line.strip() if keepslash else line.strip().rstrip('/') + parsed_url = urlparse(cleanline) + if parsed_url.netloc: + process_url(parsed_url) + def main(): - input_stream = open(args.input_file, 'r') if args.input_file else None - if not input_stream: - if not sys.stdin.isatty(): - input_stream = sys.stdin - if not input_stream: + if args.input_file: + with open(args.input_file, 'r', encoding='utf-8', errors='ignore') as input_file: + for line in input_file: + process_line(line) + elif not sys.stdin.isatty(): + for line in io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore'): + process_line(line) + else: print('[ERROR] No input file or stdin.', file=sys.stderr) exit(1) - for line in input_stream: - cleanline = line.strip() if 'keepslash' in filters else line.strip().rstrip('/') - parsed_url = urlparse(cleanline) - if parsed_url.netloc: - process_url(parsed_url) + og_stdout = sys.stdout sys.stdout = open(args.output_file, 'a+') if args.output_file else sys.stdout for host, value in urlmap.items(): @@ -169,4 +157,4 @@ def main(): for param in params: print(host + path + dict_to_params(param)) else: - print(host + path) + print(host + path) \ No newline at end of file