-
Notifications
You must be signed in to change notification settings - Fork 24
/
requirements.txt
80 lines (54 loc) · 1.96 KB
/
requirements.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# Used to read WARC files
warc==0.2.1
# Used for reading WARC (gziped) files as a stream
-e git+git://github.com/commoncrawl/gzipstream.git@59fa1e1e31e5278dc11586f7519392212dbd2549#egg=gzipstream
# Used for indexing
elasticsearch==2.3.0
# Used for tests
pytest==2.9.2
# Code coverage for tests
pytest-cov==2.2.1
# Used to repeat tests N times
pytest-repeat==0.3.0
# Send code coverage info to coveralls.io
coveralls==1.1
# Extract top-level domain names with the public suffix list
tldextract==2.0.1
# Fast hash function for generating document IDs
mmh3==2.3.1
# Popular Python module for HTTP requests.
requests==2.10.0
# Used to write & read data files from disk in URLServer
pyrocksdb==0.4
# Optimized JSON module
ujson==1.35
# Used for linting Python code
pylint==1.6.1
# Used to detect language from pages
-e git+git://github.com/commonsearch/cld2-cffi.git@f1567ff448a8d9a801e825ef030a90fa3d3c26d3#egg=cld2-cffi
# Used to map charsets coming from the web to valid Python charsets
webencodings==0.4
# Used for RPC with URLServer. Something more performant (but more complex) will be used in the future
mprpc==0.1.10
# Used as a fallback to detect character encodings when they are not explicitly defined
cchardet==1.0.0
# Used to parse HTTP headers/bodies from WARC files
http_parser==0.8.3
# Gumbo binding for HTML parsing
-e git+git://github.com/commonsearch/gumbocy.git@e83331acf0e9b5673dcfb82cf80014c558539a41#egg=gumbocy
# Used for performance improvements in critical parts of the code
Cython==0.24
# Used to interact with AWS
boto==2.41.0
# Used in URLServer to serve requests in parallel
gevent==1.1.1
# Used for the Explainer service
Flask==0.11.1
# Used for URLServer data storage & exchange
protobuf==3.0.0
# Performance-focused replacement for urlparse
-e git+git://github.com/commonsearch/urlparse4.git@fda910309aa189d57473dbb12e2d2acde49c1736#egg=urlparse4
# LRU cache implementation in C
lru-dict==1.1.4
# OAI-PMH client for the OAI document source
pyoai==2.4.5