-
Notifications
You must be signed in to change notification settings - Fork 0
/
scan.py
122 lines (98 loc) · 3.43 KB
/
scan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# Script: scan.py
# Desc: extracts contents from website / txt file
# looks for emails, phone numbers, MD5 hashes, files, hyperlinks
# Author: Oliver Thornewill von Essen, 40210534
import urllib
import re
def wget(url):
'''Gets url and displays webpage conten/html'''
try:
webpage = urllib.urlopen(url) #Open url
page_contents = webpage.read() #Define page_contents by reading the webpage
webpage.close() #Close file
return page_contents
except Exception as err:
print 'wget doesn\'t work'
print err
def txtget(filename):
'''Opens file and reads each line'''
# open file read-only, get file contents and close
try:
file = open(filename, 'r') #Open local file as read only
file_contents = file.read() #Define file_contents by reading the file
file.close() #Close file
return file_contents
except Exception as err:
print 'txtget doesn\'t work'
print err
def findemail(url):
'''Search file or webpage for Email Addresses'''
emails = []
try:
e = wget(url) #Gets content from webpage
match = list(set(re.findall(r'[\w\.-]+@[\w\.-]+', e))) #Finds emails under criteria
except:
e = txtget(filename)
match = list(set(re.findall(r'[\w\.-]+@[\w\.-]+', e)))
emails = emails + match
print '[*]', len(emails), 'Email addresses found:'
for email in emails:
print ' '+email
return emails
def phone_get(txt):
'''Finds phone numbers within a webpage'''
try:
phone =[]
p = wget(txt)
match = list(set(re.findall(r"\+44\(\d\)\d{3}\s?\d{3}\s?\d{4}", p)))
phone = phone + match
print '[*]', len(match),'Phone Numbers found:'
for phone in match:
print ' '+phone
return phone_get
except Exception as err:
print 'There was an issue finding the phone numbers'
print err
def hash_get(url):
'''Finds the hashes in html code'''
pwd =[]
try:
p = wget(url)
match = list(set(re.findall(r"[\d?\w?]{32}", p)))
pwd = pwd + match
print '[*]', len(match),'Possible MD5 Hashes found:'
for Hash in match:
print ' '+Hash
return hash_get
except Exception as err:
print 'There was an issue finding hashes'
print err
def scrape_files(url):
"""Finds and prints out possible files available in html code"""
links = []
try:
p = wget(url)
match = list(set(re.findall(r"\"([^\n\"]*(?:\\.[^\n\"]*)*\.(?:bmp|jpg|png|gif|docx))\"", p)))
links = links + match
print '[*]', len(match),'Possible files found:'
for i in match:
print ' '+i
return scrape_files
except Exception as err:
print err
def hyperlinks_get(url):
"""Finds hyperlinks in the HTML"""
hyperlinks = []
try:
p = wget(url)
match = list(set(re.findall(r'(?<=<a href=\")([^\"]+)', p)))
hyperlinks = hyperlinks + match
print '[*]', len(match),'Hyperlinks found:'
for hyperlink in match:
print ' '+hyperlink
return hyperlinks_get
except Exception as err:
print 'There was an issue finding hyperlinks'
print err
if __name__ == '__main__':
main()