-
Notifications
You must be signed in to change notification settings - Fork 91
add --zip option to zip the submission file prior to uploading it #52
base: master
Are you sure you want to change the base?
Changes from 1 commit
b5946c6
7b33aaa
d3c68a4
f0869ee
f8a67f8
5116008
ff4a29a
23bb924
574019d
9158c36
551a1b2
7f9b3c0
bd0cfed
9e5d725
d17fe8c
2bb95b4
1b0ae53
be32ad4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,8 +24,7 @@ def get_parser(self, prog_name): | |
parser.add_argument('-c', '--competition', help='competition') | ||
parser.add_argument('-u', '--username', help='username') | ||
parser.add_argument('-p', '--password', help='password') | ||
parser.add_argument('-z', '--zip', type=self._str2bool, nargs='?', const=True, default=False, | ||
help='whether to zip the submission file before uploading') | ||
parser.add_argument('-z', '--zip', help='zip the submission file before uploading?', action='store_true') | ||
|
||
return parser | ||
|
||
|
@@ -37,10 +36,7 @@ def take_action(self, parsed_args): | |
competition = config.get('competition', '') | ||
zip_flag = config.get('zip', 'no') | ||
|
||
if Submit._str2bool(zip_flag): | ||
zip = True | ||
else: | ||
zip = False | ||
zip = Submit._str2bool(zip_flag) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Overriding the built-in function name is discouraged. You can just apply the But I'll address the boolean coercion issue at config provider level, you can get a boolean by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
browser = common.login(username, password) | ||
base = 'https://www.kaggle.com' | ||
|
@@ -51,7 +47,12 @@ def take_action(self, parsed_args): | |
entry = parsed_args.entry | ||
message = parsed_args.message | ||
|
||
archive_name = Submit._rand_str(10) + '.zip' | ||
archive_name = Submit._make_archive_name(entry) | ||
|
||
# print(archive_name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Incident comments here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oops! |
||
# print(zip) | ||
# | ||
# return | ||
|
||
if zip: | ||
with zipfile.ZipFile(archive_name, 'w', zipfile.ZIP_DEFLATED) as zf: | ||
|
@@ -128,30 +129,39 @@ def take_action(self, parsed_args): | |
if zip: | ||
os.remove(target_name) | ||
|
||
@staticmethod | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Static method is not the best choice in such case. And an underscore prefix method indicates this method is private, which means it'll access the object's private fields, but it's not the case here. I suggest you extract the method to the module scope, i.e. move the |
||
def _make_archive_name(original_file_path): | ||
# if original name already has a suffix (csv,txt,etc), remove it | ||
extension_pattern = r'(^.+)\.(.+)$' | ||
|
||
# file may be in another directory | ||
original_basename = os.path.basename(original_file_path) | ||
|
||
if re.match(extension_pattern,original_basename): | ||
archive_name = re.sub(extension_pattern,r'\1.zip',original_basename) | ||
else: | ||
archive_name = original_basename+".zip" | ||
|
||
# this is used to prevent caching issues | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still can't understand why it has caching issues. Can you explain the scenario? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure about this but I think that when we upload two submissions (different files) with the same same (say, "mysubmission.csv"), this may trigger some form of caching in Kaggle (They may only check the file name and, if the file name is something they have seen before, they may not even read the file). Again, I'm not sure about this but since caching is done by many people on many websites, I though it would be a safe thing to do at little cost (just add a couple characters to the file name). But I agree it's not essential. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Caching usually applied on read operation, caching a file upload would be considered a bug. But I just tested if Kaggle caches the file? The answer is no. I uploaded the benchmark model for the titanic competition and updated the file's content manually and uploaded again to see if it's cached. The scores returned by Kaggle were different in the two submissions. So basically, it's not an issue. |
||
string_prefix = uuid.uuid4().hex[:4] | ||
|
||
prefixed_archive_name = string_prefix+"-"+archive_name | ||
|
||
original_directory_path = os.path.dirname(original_file_path) | ||
|
||
return os.path.join(original_directory_path,prefixed_archive_name) | ||
|
||
@staticmethod | ||
def _str2bool(v): | ||
""" | ||
parse truthy/falsy strings into booleans | ||
|
||
https://stackoverflow.com/a/43357954/436721 | ||
:param v: the string to be parsed | ||
:return: a boolean value | ||
""" | ||
if v.lower() in ('yes', 'true', 't', 'y', '1'): | ||
if v is True or v.lower() in ('yes', 'true', 't', 'y', '1'): | ||
return True | ||
elif v.lower() in ('no', 'false', 'f', 'n', '0'): | ||
elif v is False or v.lower() in ('no', 'false', 'f', 'n', '0'): | ||
return False | ||
else: | ||
raise ArgumentTypeError('Boolean value expected.') | ||
|
||
@staticmethod | ||
def _rand_str(length): | ||
""" | ||
this is used to prevent caching issues | ||
|
||
https://stackoverflow.com/a/34017605/436721 | ||
|
||
:param length: integer length | ||
:return: a random string of the given length | ||
""" | ||
return uuid.uuid4().hex[:length - 1] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can use getboolean method instead.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@floydwch but the
config
object here is just a python dict, it's not a ConfigParser objectThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This issue should be resolved at the config provider level, I'll address it.