-
Notifications
You must be signed in to change notification settings - Fork 7
/
common.py
106 lines (93 loc) · 3.48 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import sys
import json
import logging
import datetime
import ConfigParser
from optparse import OptionParser
import nomenklatura
from ckanclient import CkanClient
from running_stats import OpenSpendingStats
logging.basicConfig(level=logging.NOTSET)
logging.getLogger('sqlaload').setLevel(level=logging.WARN)
logging.getLogger('requests').setLevel(level=logging.WARN)
log = logging.getLogger('common')
def issue(engine, resource_id, resource_hash, stage, message,
data={}):
import sqlaload as sl # this import is slow, so it is done inside this func
table = sl.get_table(engine, 'issue')
log = logging.getLogger('issue')
log.debug("R[%s]: %s", resource_id, message)
sl.add_row(engine, table, {
'resource_id': resource_id,
'resource_hash': resource_hash,
'timestamp': datetime.datetime.utcnow(),
'stage': stage,
'message': message,
'data': json.dumps(data)
})
def clear_issues(engine, resource_id, stage):
import sqlaload as sl # this import is slow, so it is done inside this func
table = sl.get_table(engine, 'issue')
sl.delete(engine, table,
resource_id=resource_id,
stage=stage,
)
def source_path(row):
source_dir = config_get('resource-cache.dir')
if not os.path.isdir(source_dir):
os.makedirs(source_dir)
return os.path.join(source_dir, row['resource_id'])
config = None
def config_get(option):
global config
if not config:
config = ConfigParser.ConfigParser()
filename = config.read(['default.ini', 'config.ini'])
assert filename, 'Could not find config.ini in CWD: %s' % os.getcwd()
return config.get('uk25k', option)
def ckan_client():
ckan_api = config_get('ckan-api.url')
return CkanClient(base_location='http://data.gov.uk/api')
CONNECTION = []
def db_connect():
if not len(CONNECTION):
import sqlaload as sl
sqlalchemy_url = config_get('sqlalchemy.url')
log.info('Using database: %s', sqlalchemy_url)
CONNECTION.append(sl.connect(sqlalchemy_url))
return CONNECTION[0]
NK_DATASETS = {}
def nk_connect(dataset):
if not dataset in NK_DATASETS:
NK_DATASETS[dataset] = nomenklatura.Dataset(
dataset,
api_key='beaf2ff2-ea94-47c0-942f-1613a09056c2')
return NK_DATASETS[dataset]
def parse_args(allow_row=False):
filter_ = {}
usage = "usage: %prog [options]"
parser = OptionParser(usage=usage)
parser.add_option("-f", "--force",
action="store_true", dest="force", default=False,
help="Don't skip previously processed records")
if allow_row:
parser.add_option("--row", type="int", dest="row")
parser.add_option("-r", "--resource_id", dest="resource_id")
parser.add_option("-d", "--dataset_name", dest="dataset_name")
parser.add_option("-i", "--dataset_id", dest="dataset_id")
parser.add_option("-p", "--publisher_name", dest="publisher_name")
(options, args) = parser.parse_args()
if args:
print 'Error: there should be no args, just options'
parser.print_help()
sys.exit(1)
if options.resource_id:
filter_['resource_id'] = options.resource_id
elif options.dataset_id:
filter_['package_id'] = options.dataset_id
elif options.dataset_name:
filter_['package_name'] = options.dataset_name
elif options.publisher_name:
filter_['publisher_name'] = options.publisher_name
return options, filter_