forked from bank2ynab/bank2ynab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bank2ynab.py
executable file
·577 lines (515 loc) · 21.5 KB
/
bank2ynab.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
#!/usr/bin/env python3
#
# bank2ynab.py
#
# Searches specified folder or default download folder for exported
# bank transaction file (.csv format) & adjusts format for YNAB import
# Please see here for details: https://github.com/torbengb/bank2ynab
#
# MIT License: https://github.com/torbengb/bank2ynab/blob/master/LICENSE
#
# DISCLAIMER: Please use at your own risk. This tool is neither officially
# supported by YNAB (the company) nor by YNAB (the software) in any way.
# Use of this tool could introduce problems into your budget that YNAB,
# through its official support channels, will not be able to troubleshoot
# or fix. See also the full MIT licence.
#
#
# don't edit below here unless you know what you're doing!
from os.path import abspath, join, dirname, basename
import codecs
import csv
import os
import importlib
import re
from datetime import datetime
import logging
# configure our logger
logging.basicConfig(format="%(levelname)s %(message)s", level=logging.INFO)
# main Python2 switch
# any module with different naming should be handled here
__PY2 = False
try:
import configparser
except ImportError:
__PY2 = True
import ConfigParser as configparser
import cStringIO
try:
FileNotFoundError
except NameError:
FileNotFoundError = OSError
# classes dealing with input and output charsets across python versions
# (well, really just for py2...)
class CrossversionFileContext(object):
""" ContextManager class for common operations on files"""
def __init__(self, file_path, is_py2, **kwds):
self.file_path = os.path.abspath(file_path)
self.stream = None
self.csv_object = None
self.params = kwds
self.is_py2 = is_py2
def __enter__(self):
pass
def __exit__(self, exc_type, exc_val, exc_tb):
# cleanup
del self.csv_object
if self.stream is not None:
self.stream.close()
if exc_type is not None:
# this signals not to suppress any exception
return False
class CrossversionCsvReader(CrossversionFileContext):
""" context manager returning a csv.Reader-compatible object
regardless of Python version"""
def __enter__(self):
encoding = detect_encoding(self.file_path)
if self.is_py2:
self.stream = open(self.file_path, "rb")
self.csv_object = UnicodeReader(self.stream,
encoding=encoding,
**self.params)
else:
self.stream = open(self.file_path, encoding=encoding)
self.csv_object = csv.reader(self.stream, **self.params)
return self.csv_object
class CrossversionCsvWriter(CrossversionFileContext):
""" context manager returning a csv.Writer-compatible object
regardless of Python version"""
def __enter__(self):
if self.is_py2:
self.stream = open(self.file_path, "wb")
self.csv_object = UnicodeWriter(
self.stream,
encoding="utf-8",
**self.params)
else:
self.stream = open(
self.file_path, "w",
encoding="utf-8", newline="")
self.csv_object = csv.writer(self.stream, **self.params)
return self.csv_object
def detect_encoding(filepath):
"""
Utility to detect file encoding. This is imperfect, but
should work for the most common cases.
:param filepath: string path to a given file
:return: encoding alias that can be used with open()in py3
or codecs.open in py2
"""
# because some encodings will happily encode anything even if wrong,
# keeping the most common near the top should make it more likely that
# we're doing the right thing.
encodings = ['ascii', 'utf-8', 'utf-16', 'cp1251', 'utf_32', 'utf_32_be',
'utf_32_le', 'utf_16', 'utf_16_be',
'utf_16_le', 'utf_7', 'utf_8_sig', 'cp850', 'cp852',
'latin_1', 'big5', 'big5hkscs', 'cp037', 'cp424',
'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp855',
'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
'cp863', 'cp864', 'cp865', 'cp866', 'cp869', 'cp874',
'cp875', 'cp932', 'cp949', 'cp950', 'cp1006', 'cp1026',
'cp1140', 'cp1250', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
'cp1256', 'cp1257', 'cp1258', 'euc_jp', 'euc_jis_2004',
'euc_jisx0213', 'euc_kr', 'gb2312', 'gbk', 'gb18030', 'hz',
'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext',
'iso2022_kr', 'latin_1', 'iso8859_2', 'iso8859_3',
'iso8859_4', 'iso8859_5', 'iso8859_6', 'iso8859_7',
'iso8859_8', 'iso8859_9', 'iso8859_10', 'iso8859_11',
'iso8859_13', 'iso8859_14', 'iso8859_15', 'iso8859_16',
'johab', 'koi8_r', 'koi8_u', 'mac_cyrillic', 'mac_greek',
'mac_iceland', 'mac_latin2', 'mac_roman', 'mac_turkish',
'ptcp154', 'shift_jis', 'shift_jis_2004', 'shift_jisx0213']
result = None
error = (ValueError, UnicodeError, UnicodeDecodeError, UnicodeEncodeError)
for enc in encodings:
try:
with codecs.open(filepath, "r", encoding=enc) as f:
for line in f:
line.encode("utf-8")
return enc
except error:
continue
return result
# utilities to be used only by py2
# see https://docs.python.org/2/library/csv.html#examples for explanation
class UTF8Recoder:
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
class UnicodeReader:
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)
def next(self):
row = self.reader.next()
return [unicode(s, "utf-8") for s in row]
def __iter__(self):
return self
@property
def line_num(self):
return self.reader.line_num
class UnicodeWriter:
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
data = self.queue.getvalue().decode("utf-8")
data = self.encoder.encode(data)
self.stream.write(data)
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
# -- end of py2 utilities
# -- end of charset-handling classes
# Generic utilities
def get_configs():
""" Retrieve all configuration parameters."""
conf_files = ["bank2ynab.conf", "user_configuration.conf"]
if not os.path.exists("bank2ynab.conf"):
logging.error("\nError: Can't find configuration file: bank2ynab.conf")
config = configparser.RawConfigParser()
if __PY2:
config.read(conf_files)
else:
config.read(conf_files, encoding="utf-8")
return config
def fix_conf_params(conf_obj, section_name):
""" from a ConfigParser object, return a dictionary of all parameters
for a given section in the expected format.
Because ConfigParser defaults to values under [DEFAULT] if present, these
values should always appear unless the file is really bad.
:param configparser_object: ConfigParser instance
:param section_name: string of section name in config file
(e.g. "MyBank" matches "[MyBank]" in file)
:return: dict with all parameters
"""
config = {
"input_columns": ["Input Columns", False, ","],
"output_columns": ["Output Columns", False, ","],
"input_filename": ["Source Filename Pattern", False, ""],
"path": ["Source Path", False, ""],
"ext": ["Source Filename Extension", False, ""],
"regex": ["Use Regex For Filename", True, ""],
"fixed_prefix": ["Output Filename Prefix", False, ""],
"input_delimiter": ["Source CSV Delimiter", False, ""],
"header_rows": ["Header Rows", False, ""],
"footer_rows": ["Footer Rows", False, ""],
"date_format": ["Date Format", False, ""],
"delete_original": ["Delete Source File", True, ""],
"cd_flags": ["Inflow or Outflow Indicator", False, ","],
"payee_to_memo": ["Use Payee for Memo", True, ""],
"plugin": ["Plugin", False, ""]}
for key in config:
config[key] = get_config_line(conf_obj, section_name, config[key])
config["bank_name"] = section_name
# quick n' dirty fix for tabs as delimiters
if config["input_delimiter"] == "\\t":
config["input_delimiter"] = "\t"
return config
def get_config_line(conf_obj, section_name, args):
"""Get parameter for a given section in the expected format."""
param = args[0]
boolean = args[1]
splitter = args[2]
if boolean is True:
line = conf_obj.getboolean(section_name, param)
else:
line = conf_obj.get(section_name, param)
if splitter != "":
line = line.split(splitter)
return line
def find_directory(filepath):
""" finds the downloads folder for the active user if filepath is not set
"""
if filepath is "":
if os.name is "nt":
# Windows
try:
import winreg
except ImportError:
import _winreg as winreg
shell_path = ("SOFTWARE\\Microsoft\\Windows\\CurrentVersion"
"\\Explorer\\Shell Folders")
dl_key = "{374DE290-123F-4565-9164-39C4925E467B}"
with winreg.OpenKey(winreg.HKEY_CURRENT_USER, shell_path) as key:
input_dir = winreg.QueryValueEx(key, dl_key)[0]
else:
# Linux, OSX
userhome = os.path.expanduser('~')
input_dir = os.path.join(userhome, "Downloads")
else:
if not os.path.exists(filepath):
s = "Error: Input directory not found: {}"
raise FileNotFoundError(s.format(filepath))
input_dir = filepath
return input_dir
# -- end of utilities
# Classes doing the actual work
class B2YBank(object):
""" Object parsing and outputting data for a specific bank.
This can be subclassed to handle formats requiring special handling,
overriding any of get_files(), read_data() or write_data()."""
def __init__(self, config_object, is_py2=False):
"""
:param config_object: dict containing config parameters
:param is_py2: flag signalling we are running under python 2
"""
self.name = config_object.get("bank_name", "DEFAULT")
self.config = config_object
self._is_py2 = is_py2
def get_files(self):
""" find the transaction file
:return: list of matching files found
"""
ext = self.config["ext"]
file_pattern = self.config["input_filename"]
prefix = self.config["fixed_prefix"]
regex_active = self.config["regex"]
files = list()
missing_dir = False
try_path = self.config["path"]
path = ""
if file_pattern is not "":
try:
path = find_directory(try_path)
except FileNotFoundError:
missing_dir = True
path = find_directory("")
path = abspath(path)
try:
directory_list = os.listdir(path)
except FileNotFoundError:
directory_list = os.listdir(".")
if regex_active is True:
files = [join(path, f)
for f in directory_list
if f.endswith(ext)
if re.match(file_pattern + r".*\.", f)
if prefix not in f]
else:
files = [join(path, f)
for f in directory_list
if f.endswith(ext)
if f.startswith(file_pattern)
if prefix not in f]
if files != [] and missing_dir is True:
s = ("\nFormat: {}\n\nError: Can't find download path: {}"
"\nTrying default path instead:\t {}")
logging.error(s.format(self.name, try_path, path))
return files
def read_data(self, file_path):
""" extract data from given transaction file
:param file_path: path to file
:return: list of cleaned data rows
"""
delim = self.config["input_delimiter"]
output_columns = self.config["output_columns"]
header_rows = int(self.config["header_rows"])
footer_rows = int(self.config["footer_rows"])
cd_flags = self.config["cd_flags"]
date_format = self.config["date_format"]
fill_memo = self.config["payee_to_memo"]
output_data = []
# give plugins a chance to pre-process the file
self._preprocess_file(file_path)
# get total number of rows in transaction file using a generator
with CrossversionCsvReader(file_path,
self._is_py2,
delimiter=delim) as row_count_reader:
row_count = sum(1 for row in row_count_reader)
with CrossversionCsvReader(file_path,
self._is_py2,
delimiter=delim) as transaction_reader:
# make each row of our new transaction file
for row in transaction_reader:
line = transaction_reader.line_num
# skip header & footer rows
if header_rows < line <= (row_count - footer_rows):
# skip blank rows
if len(row) == 0:
continue
# process Inflow or Outflow flags
row = self._cd_flag_process(row, cd_flags)
# fix the date format
row = self._fix_date(row, date_format)
# create our output_row
fixed_row = self._fix_row(row)
# convert negative inflows to standard outflows
fixed_row = self._fix_outflow(fixed_row)
# fill in blank memo fields
fixed_row = self._auto_memo(fixed_row, fill_memo)
# check our row isn't a null transaction
if self._valid_row(fixed_row) is True:
output_data.append(fixed_row)
# add in column headers
logging.info("Parsed {} lines".format(len(output_data)))
output_data.insert(0, output_columns)
return output_data
def _preprocess_file(self, file_path):
"""
exists solely to be used by plugins for pre-processing a file
that otherwise can be read normally (e.g. weird format)
:param file_path: path to file
"""
# intentionally empty - the plugins can use this function
return
def _fix_row(self, row):
"""
rearrange a row of our file to match expected output format
:param row: list of values
:return: list of values in correct output format
"""
output = []
for header in self.config["output_columns"]:
try:
# check to see if our output header exists in input
index = self.config["input_columns"].index(header)
cell = row[index]
except (ValueError, IndexError):
# header isn't in input, default to blank cell
cell = ""
output.append(cell)
return output
def _fix_outflow(self, row):
"""
convert negative inflow into positive outflow
:param row: list of values
:return: list of values with corrected outflow column
"""
inflow_index = self.config["output_columns"].index("Inflow")
outflow_index = self.config["output_columns"].index("Outflow")
inflow = row[inflow_index]
if(inflow.startswith("-")):
row[inflow_index] = ""
row[outflow_index] = inflow[1:]
return row
def _valid_row(self, row):
""" if our row doesn't have an inflow or outflow, mark as invalid
:param row: list of values
"""
inflow_index = self.config["output_columns"].index("Inflow")
outflow_index = self.config["output_columns"].index("Outflow")
if row[inflow_index] == "" and row[outflow_index] == "":
return False
return True
def _auto_memo(self, row, fill_memo):
""" auto fill empty memo field with payee info
:param row: list of values
:param fill_memo: boolean
"""
if fill_memo:
payee_index = self.config["output_columns"].index("Payee")
memo_index = self.config["output_columns"].index("Memo")
if row[memo_index] == "":
row[memo_index] = row[payee_index]
return row
def _fix_date(self, row, date_format):
""" fix date format when required
convert date to DD/MM/YYYY
:param row: list of values
:param date_format: date format string
"""
if date_format:
date_col = self.config["input_columns"].index("Date")
if row[date_col] == "":
return row
# parse our date according to provided formatting string
input_date = datetime.strptime(row[date_col], date_format)
# do our actual date processing
output_date = datetime.strftime(input_date, "%d/%m/%Y")
row[date_col] = output_date
return row
def _cd_flag_process(self, row, cd_flags):
""" fix rows where inflow or outflow is indicated by
a flag in a separate column
:param row: list of values
:param cd_flags: list of parameters for applying indicators
"""
if len(cd_flags) == 3:
indicator_col = int(cd_flags[0])
outflow_flag = cd_flags[2]
inflow_col = self.config["input_columns"].index("Inflow")
# if this row is indicated to be outflow, make inflow negative
if row[indicator_col] == outflow_flag:
row[inflow_col] = "-" + row[inflow_col]
return row
def write_data(self, filename, data):
""" write out the new CSV file
:param filename: path to output file
:param data: cleaned data ready to output
"""
target_dir = dirname(filename)
target_fname = basename(filename)[:-4]
new_filename = "{}{}.csv".format(
self.config["fixed_prefix"],
target_fname)
while os.path.isfile(new_filename):
counter = 1
new_filename = "{}{}_{}.csv".format(
self.config["fixed_prefix"],
target_fname, counter)
counter += 1
target_filename = join(target_dir, new_filename)
logging.info("Writing output file: {}".format(target_filename))
with CrossversionCsvWriter(target_filename, self._is_py2) as writer:
for row in data:
writer.writerow(row)
return target_filename
def build_bank(bank_config):
""" Factory method loading the correct class for a given configuration. """
plugin_module = bank_config.get("plugin", None)
if plugin_module:
p_mod = importlib.import_module("plugins.{}".format(plugin_module))
if not hasattr(p_mod, "build_bank"):
s = ("The specified plugin {}.py".format(plugin_module) +
"does not contain the required "
"build_bank(config, is_py2) method.")
raise ImportError(s)
bank = p_mod.build_bank(bank_config, __PY2)
return bank
else:
return B2YBank(bank_config, __PY2)
class Bank2Ynab(object):
""" Main program instance, responsible for gathering configuration,
creating the right object for each bank, and triggering elaboration."""
def __init__(self, config_object, is_py2=False):
self._is_py2 = is_py2
self.banks = []
for section in config_object.sections():
bank_config = fix_conf_params(config_object, section)
bank_object = build_bank(bank_config)
self.banks.append(bank_object)
def run(self):
""" Main program flow """
# initialize variables for summary:
files_processed = 0
# process account for each config file
for bank in self.banks:
# find all applicable files
files = bank.get_files()
bank_name = bank.name
for src_file in files:
logging.info("\nParsing input file: {} (format: {})".format(
src_file, bank_name))
# increment for the summary:
files_processed += 1
# create cleaned csv for each file
output = bank.read_data(src_file)
bank.write_data(src_file, output)
# delete original csv file
if bank.config["delete_original"] is True:
logging.info("Removing input file: {}".format(src_file))
os.remove(src_file)
logging.info("\nDone! {} files processed.\n".format(files_processed))
# Let's run this thing!
if __name__ == "__main__":
b2y = Bank2Ynab(get_configs(), __PY2)
b2y.run()