Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Maintenance update #214

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 52 additions & 21 deletions mglib/mglib.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import base64
import json
import string
import time
import random
import hashlib
import subprocess
Expand All @@ -24,7 +23,7 @@

from .__init__ import API_URL

if not sys.version_info[0:2][0] == 3 and not sys.version_info[0:2] == (2, 7) :
if not sys.version_info[0:2][0] == 3 and not sys.version_info[0:2] == (2, 7):
sys.stderr.write('ERROR: MG-RAST Tools requires at least Python 2.7.')
exit(1)

Expand Down Expand Up @@ -65,7 +64,7 @@ def body_from_url(url, accept, auth=None, data=None, debug=False, method=None):
except:
sys.stderr.write("ERROR (%s): %s\n" %(error.code, error.read().decode("utf8")))
finally:
raise(HTTPerror)
raise HTTPError(error.url, error.code, "HTTP error", error.hdrs, error.fp)
if not res:
sys.stderr.write("ERROR: no results returned\n")
sys.exit(1)
Expand All @@ -75,18 +74,22 @@ def body_from_url(url, accept, auth=None, data=None, debug=False, method=None):
def obj_from_url(url, auth=None, data=None, debug=False, method=None):
url = quote(url, safe='/:=?&', encoding="utf-8", errors="strict")
if type(data) is str:
data=data.encode("utf8")
data = data.encode("utf8")
if debug:
print("Data", repr(data))
try:
result = body_from_url(url, 'application/json', auth=auth, data=data, debug=debug, method=method)
read = result.read()
except: # try one more time ConnectionResetError is incompatible with python2
result = body_from_url(url, 'application/json', auth=auth, data=data, debug=debug, method=method)
read = result.read()
if debug:
print("Result", repr(read))
if result.headers["content-type"] == "application/x-download" or result.headers["content-type"] == "application/octet-stream":
return(read) # Watch out!
if result.headers["content-type"][0:9] == "text/html": # json decoder won't work
return(read) # Watch out!
if result.headers["content-type"] == "application/json": # If header is set, this should work
if result.headers["content-type"] == "application/json": # If header is set, this should work
data = read.decode("utf8")
obj = json.loads(data)
else:
Expand Down Expand Up @@ -139,8 +142,8 @@ def async_rest_api(url, auth=None, data=None, debug=False, delay=60):
# If "status" is nor present, or if "status" is somehow not "submitted"
# assume this is not an asynchronous call and it's done.
if type(submit) == bytes: # can't decode
try:
return decode("utf-8", submit)
try:
return submit.decode("utf-8")
except:
return submit
if ('status' in submit) and (submit['status'] != 'submitted') and (submit['status'] != "processing") and ('data' in submit):
Expand Down Expand Up @@ -198,11 +201,12 @@ def post_file(url, keyname, filename, data={}, auth=None, debug=False):
obj = None

# try maxt times
while not success and counter < maxt :
while not success and counter < maxt:
try:
res = requests.post(url, data=datagen, headers=header, stream=True)
except HTTPError as error:
try:
sys.stderr.write("Retrying POST "+url, repr(datagen), repr(header))
eobj = json.loads(error.read())
if 'ERROR' in eobj:
sys.stderr.write("ERROR (%s): %s\n" %(error.code, eobj['ERROR']))
Expand All @@ -213,13 +217,13 @@ def post_file(url, keyname, filename, data={}, auth=None, debug=False):
finally:
# sys.exit(1)
return None
except OSError as error:
except OSError as error:
sys.stderr.write("ERROR with post_file\n")
sys.stderr.write("ERROR (%s): %s\n" %(error.code, error.read()))
if not res:
sys.stderr.write("ERROR: no results returned for %s\n"% (filename))
# sys.exit(1)
else:
else:
obj = json.loads(res.content.decode("utf8"))
if debug:
print(json.dumps(obj))
Expand All @@ -228,7 +232,7 @@ def post_file(url, keyname, filename, data={}, auth=None, debug=False):
else:
success = True
# increase counter
if not success :
if not success:
counter += 1
time.sleep(counter * sleep)
return(obj)
Expand Down Expand Up @@ -258,9 +262,34 @@ def sparse_to_dense(sMatrix, rmax, cmax):
dMatrix[r][c] = v
return dMatrix

def clean_row(element):
a = ["domain", "phylum", "family" , "class", "order", "genus", "species"]
b = [""] * len(a)
if element["metadata"] is None:
b[-1] = element["id"]
return(";".join(b))
else:
if ('ontology' in element['metadata'].keys()):
name = ';'.join(element['metadata']['ontology'])
if ('hierarchy' in element['metadata'].keys()):
if "level1" in element['metadata']["hierarchy"].keys():
a = ["level1", "level2", "level3" , "level4", "function"]
else:
a = ["domain", "phylum", "family" , "class", "order", "genus", "species"]
b = [""] * len(a)
for k,v in element['metadata']["hierarchy"].items():
b[a.index(k)] = v
name = ';'.join(b)
return(name)

# transform BIOM format to tabbed table
# returns max value of matrix
def biom_to_tab(biom, hdl, rows=None, use_id=True, col_name=False):
''' biom
hdl
rows
use_id
col_name '''
assert 'matrix_type' in biom.keys(), repr(biom)
if biom['matrix_type'] == 'sparse':
matrix = sparse_to_dense(biom['data'], biom['shape'][0], biom['shape'][1])
Expand All @@ -273,8 +302,10 @@ def biom_to_tab(biom, hdl, rows=None, use_id=True, col_name=False):
rowmax = []
for i, row in enumerate(matrix):
name = biom['rows'][i]['id']
if (not use_id) and ('ontology' in biom['rows'][i]['metadata']):
name += ':'+biom['rows'][i]['metadata']['ontology'][-1]
if use_id:
name = biom['rows'][i]["id"] # Use row[].id
else:
name = clean_row(biom['rows'][i])
if rows and (name not in rows):
continue
try:
Expand Down Expand Up @@ -388,7 +419,7 @@ def merge_biom(b1, b2):
add_row.append(b2['data'][i][j])
mBiom['rows'].append(r)
mBiom['data'].append(add_row)
mBiom['shape'] = [ len(mBiom['rows']), len(mBiom['columns']) ]
mBiom['shape'] = [len(mBiom['rows']), len(mBiom['columns'])]
return mBiom

# transform BIOM format to matrix in json format
Expand All @@ -410,9 +441,9 @@ def biom_to_matrix(biom, col_name=False, sig_stats=False):
else:
data = biom['data']
if sig_stats and ('significance' in biom['rows'][0]['metadata']) and (len(biom['rows'][0]['metadata']['significance']) > 0):
cols.extend([s[0] for s in biom['rows'][0]['metadata']['significance']] )
cols.extend([s[0] for s in biom['rows'][0]['metadata']['significance']])
for i, r in enumerate(biom['rows']):
data[i].extend([s[1] for s in r['metadata']['significance']] )
data[i].extend([s[1] for s in r['metadata']['significance']])
return rows, cols, data

# transform tabbed table to matrix in json format
Expand All @@ -435,7 +466,7 @@ def sub_matrix(matrix, ncols):
return matrix
sub = list()
for row in matrix:
sub.append(row[:ncols] )
sub.append(row[:ncols])
return sub

# return KBase id for MG-RAST id
Expand Down Expand Up @@ -467,7 +498,7 @@ def kbids_to_mgids(kbids):
# or reverse
def kbid_lookup(ids, reverse=False):
request = 'mg2kb' if reverse else 'kb2mg'
post = json.dumps({'ids': ids}, separators=(',',':'))
post = json.dumps({'ids': ids}, separators=(',', ':'))
data = obj_from_url(API_URL+'/job/'+request, auth=auth, data=post)
return data['data']

Expand All @@ -478,7 +509,7 @@ def get_auth_token(opts=None):
return os.environ['MGRKEY']
if hasattr(opts, "token") and opts.token is not None:
return opts.token
elif hasattr(opts, 'user') and hasattr(opts, 'passwd') and (opts.user or opts.passwd):
if hasattr(opts, 'user') and hasattr(opts, 'passwd') and (opts.user or opts.passwd):
if opts.user and opts.passwd:
return token_from_login(opts.user, opts.passwd)
else:
Expand All @@ -494,7 +525,7 @@ def get_auth(token):
if not os.path.isfile(auth_file):
sys.stderr.write("ERROR: missing authentication file, please login\n")
return None
auth_obj = json.load(open(auth_file,'r'))
auth_obj = json.load(open(auth_file, 'r'))
if ("token" not in auth_obj) or ("id" not in auth_obj) or ("expiration" not in auth_obj):
sys.stderr.write("ERROR: invalid authentication file, please login\n")
return None
Expand All @@ -510,7 +541,7 @@ def token_from_login(user, passwd):

def login(token):
auth_obj = obj_from_url(API_URL+"/user/authenticate", auth=token)
json.dump(auth_obj, open(auth_file,'w'))
json.dump(auth_obj, open(auth_file, 'w'))

def login_from_token(token):
parts = {}
Expand Down
50 changes: 25 additions & 25 deletions scripts/mg-compare-functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,10 @@ def main(args):
parser.add_argument("--evalue", type=int, dest="evalue", default=15, help="negative exponent value for maximum e-value cutoff, default is 15")
parser.add_argument("--identity", type=int, dest="identity", default=60, help="percent value for minimum %% identity cutoff, default is 60")
parser.add_argument("--length", type=int, dest="length", default=15, help="value for minimum alignment length cutoff, default is 15")
parser.add_argument("--hierarchy", action="store_true", dest="hierarchy", help="Don't use id, show hierarchy")
parser.add_argument("--version", type=int, dest="version", default=1, help="M5NR annotation version to use, default is 1")
parser.add_argument("--temp", dest="temp", default=None, help="filename to temporarly save biom output at each iteration")

# get inputs
opts = parser.parse_args()
if not opts.ids:
Expand All @@ -74,16 +75,16 @@ def main(args):
if opts.format not in ['text', 'biom']:
sys.stderr.write("ERROR: invalid input format\n")
return 1

# get auth
token = get_auth_token(opts)

# build url
id_list = []
if os.path.isfile(opts.ids):
id_str = open(opts.ids,'r').read()
id_str = open(opts.ids, 'r').read()
try:
id_obj = json.loads(id_str)
id_obj = json.loads(id_str)
if 'elements' in id_obj:
id_list = id_obj['elements'].keys()
elif 'members' in id_obj:
Expand All @@ -92,14 +93,14 @@ def main(args):
id_list = id_str.strip().split('\n')
else:
id_list = opts.ids.strip().split(',')
params = [ ('group_level', opts.level),
('source', opts.source),
('evalue', opts.evalue),
('identity', opts.identity),
('length', opts.length),
('version', opts.version),
('result_type', 'abundance'),
('asynchronous', '1') ]
params = [('group_level', opts.level),
('source', opts.source),
('evalue', opts.evalue),
('identity', opts.identity),
('length', opts.length),
('version', opts.version),
('result_type', 'abundance'),
('asynchronous', '1') ]
if opts.intersect_level and opts.intersect_name:
params.append(('filter_source', opts.intersect_source))
params.append(('filter_level', opts.intersect_level))
Expand All @@ -110,12 +111,12 @@ def main(args):
else:
for f in opts.intersect_name.strip().split(','):
params.append(('filter', f))

# retrieve data
biom = None
size = 50
if len(id_list) > size:
for i in xrange(0, len(id_list), size):
for i in range(0, len(id_list), size):
sub_ids = id_list[i:i+size]
cur_params = copy.deepcopy(params)
for i in sub_ids:
Expand All @@ -132,8 +133,7 @@ def main(args):
biom = async_rest_api(url, auth=token)
if opts.temp:
json.dump(biom, open(opts.temp, 'w'))



# get sub annotations
sub_ann = set()
if opts.filter_name and opts.filter_level:
Expand All @@ -147,30 +147,30 @@ def main(args):
for f in opts.filter_name.strip().split(','):
filter_list.append(f)
# annotation mapping from m5nr
params = [ ('version', opts.version),
('min_level', opts.level),
('source', opts.source) ]
params = [('version', opts.version),
('min_level', opts.level),
('source', opts.source) ]
url = opts.url+'/m5nr/ontology?'+urlencode(params, True)
data = obj_from_url(url)
level = 'level4' if opts.level == 'function' else opts.level
for ann in data['data']:
if (opts.filter_level in ann) and (level in ann) and (ann[opts.filter_level] in filter_list):
sub_ann.add(ann[level])

# output data
if (not opts.output) or (opts.output == '-'):
out_hdl = sys.stdout
else:
out_hdl = open(opts.output, 'w')

if opts.format == 'biom':
out_hdl.write(json.dumps(biom)+"\n")
else:
biom_to_tab(biom["data"], out_hdl, rows=sub_ann)
biom_to_tab(biom["data"], out_hdl, rows=sub_ann, use_id=not opts.hierarchy)

out_hdl.close()
return 0


if __name__ == "__main__":
sys.exit(main(sys.argv))
Loading