diff --git a/.gitignore b/.gitignore index a317fe3..3efc1c3 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ dist */__pycache__/ */*.pyc .kaggle-cli +.idea/ diff --git a/README.md b/README.md index b0eb162..fa3b0c4 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,12 @@ To submit an entry. $ kg submit -u -p -c -m "" ``` +Optionally, add `-z` to zip the submission file before uploading: + +``` +$ kg submit -u -p -c -z -m "" +``` + ### Download To download the data files (resumable). @@ -53,6 +59,8 @@ $ kg dataset -u -p -o -d ### Config To set global config. +> Optional: add `-z` to zip submission files before uploading. + ``` $ kg config -g -u -p -c ``` diff --git a/kaggle_cli/config.py b/kaggle_cli/config.py index d81d491..7fdcc59 100644 --- a/kaggle_cli/config.py +++ b/kaggle_cli/config.py @@ -8,7 +8,20 @@ CONFIG_DIR_NAME = '.kaggle-cli' CONFIG_FILE_NAME = 'config' -DATA_OPTIONS = set(['username', 'password', 'competition']) +FIELD_OPTIONS = { + 'username': { + 'type': str + }, + 'password': { + 'type': str + }, + 'competition': { + 'type': str + }, + 'zip': { + 'type': bool + } +} def get_config(config_path): @@ -49,11 +62,30 @@ def merge_dicts(x, y={}): return z +def config_section_to_dict(config, section, field_options): + result_dict = {} + for name, spec in field_options.items(): + if spec['type'] == bool: + value = config.getboolean(section, name, fallback=None) + elif spec['type'] == int: + value = config.getint(section, name, fallback=None) + elif spec['type'] == float: + value = config.getfloat(section, name, fallback=None) + else: + value = config.get(section, name, fallback=None) + if value: + result_dict[name] = value + return result_dict + + def get_working_config(configs): return reduce( lambda working_config, config: merge_dicts(config, working_config), - map(lambda config: dict(config['user']), configs), + map( + lambda config: + config_section_to_dict(config, 'user', FIELD_OPTIONS), + configs), {} ) @@ -85,6 +117,11 @@ def get_parser(self, prog_name): parser.add_argument('-u', '--username', help='username') parser.add_argument('-p', '--password', help='password') parser.add_argument('-c', '--competition', help='competition') + parser.add_argument( + '-z', + '--zip', + help='zip the submission file before uploading?', + action='store_true') parser.add_argument( '-g', '--global', @@ -97,7 +134,7 @@ def get_parser(self, prog_name): def take_action(self, parsed_args): parsed_arg_dict = vars(parsed_args) - if DATA_OPTIONS & set( + if set(FIELD_OPTIONS.keys()) & set( filter(lambda x: parsed_arg_dict[x], parsed_arg_dict) ): if parsed_arg_dict['global']: @@ -135,6 +172,11 @@ def take_action(self, parsed_args): parsed_arg_dict['competition'] ) + if parsed_arg_dict['zip']: + config.set( + 'user','zip','yes' + ) + with open(config_path, 'w') as config_file: config.write(config_file) else: diff --git a/kaggle_cli/meta.py b/kaggle_cli/meta.py index 58a6533..46dea4b 100644 --- a/kaggle_cli/meta.py +++ b/kaggle_cli/meta.py @@ -1 +1 @@ -VERSION = '0.12.8' +VERSION = '0.12.10' diff --git a/kaggle_cli/submit.py b/kaggle_cli/submit.py index 5272441..335231b 100644 --- a/kaggle_cli/submit.py +++ b/kaggle_cli/submit.py @@ -2,6 +2,10 @@ import time import re import json +import sys +import uuid +from argparse import ArgumentTypeError +import zipfile from cliff.command import Command @@ -21,6 +25,7 @@ def get_parser(self, prog_name): parser.add_argument('-c', '--competition', help='competition') parser.add_argument('-u', '--username', help='username') parser.add_argument('-p', '--password', help='password') + parser.add_argument('-z', '--zip', help='zip the submission file before uploading?', action='store_true') return parser @@ -30,16 +35,23 @@ def take_action(self, parsed_args): username = config.get('username', '') password = config.get('password', '') competition = config.get('competition', '') + zip = config.get('zip', False) browser = common.login(username, password) base = 'https://www.kaggle.com' competition_url = '/'.join([base, 'c', competition]) - file_form_submit_url = '/'.join([base, 'blobs/inbox/submissions']) - entry_form_submit_url = '/'.join([competition_url, 'submission.json']) + file_form_url = '/'.join([base, 'blobs/inbox/submissions']) + entry_form_url = '/'.join([competition_url, 'submission.json']) entry = parsed_args.entry message = parsed_args.message + archive_name = make_archive_name(entry) + + if zip: + with zipfile.ZipFile(archive_name, 'w', zipfile.ZIP_DEFLATED) as zf: + zf.write(entry) + competition_page = browser.get(competition_url) if competition_page.status_code == 404: @@ -51,18 +63,23 @@ def take_action(self, parsed_args): str(competition_page.soup) ).group(1) + if zip: + target_name = archive_name + else: + target_name = entry + form_submission = browser.post( - file_form_submit_url, + file_form_url, data={ - 'fileName': entry, - 'contentLength': os.path.getsize(entry), - 'lastModifiedDateUtc': int(os.path.getmtime(entry) * 1000) + 'fileName': target_name, + 'contentLength': os.path.getsize(target_name), + 'lastModifiedDateUtc': int(os.path.getmtime(target_name) * 1000) } ).json() file_submit_url = base + form_submission['createUrl'] - with open(entry, 'rb') as submission_file: + with open(target_name, 'rb') as submission_file: token = browser.post( file_submit_url, files={ @@ -70,8 +87,8 @@ def take_action(self, parsed_args): } ).json()['token'] - browser.post( - entry_form_submit_url, + entry_form_resp_message = browser.post( + entry_form_url, data=json.dumps({ 'blobFileTokens': [token], 'submissionDescription': message if message else '' @@ -79,7 +96,11 @@ def take_action(self, parsed_args): headers={ 'Content-Type': 'application/json' } - ) + ).json()['pageMessages'] + + if entry_form_resp_message and entry_form_resp_message[0]['type'] == 'error': + print(entry_form_resp_message[0]['dangerousHtmlMessage']) + return status_url = ( 'https://www.kaggle.com/' @@ -98,3 +119,24 @@ def take_action(self, parsed_args): else: print('something went wrong') break + + if zip: + os.remove(target_name) + + +def make_archive_name(original_file_path): + + # if original name already has a suffix (csv,txt,etc), remove it + extension_pattern = r'(^.+)\.(.+)$' + + # file may be in another directory + original_basename = os.path.basename(original_file_path) + + if re.match(extension_pattern,original_basename): + archive_name = re.sub(extension_pattern,r'\1.zip',original_basename) + else: + archive_name = original_basename+".zip" + + original_directory_path = os.path.dirname(original_file_path) + + return os.path.join(original_directory_path,archive_name)