Skip to content

Commit

Permalink
Merge pull request #59 from bgruening/dev
Browse files Browse the repository at this point in the history
merge dev into master
  • Loading branch information
bgruening committed Sep 5, 2015
2 parents 33eb2fc + 32fd711 commit ebb9854
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 73 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E084DAB9 && \
libfreetype6-dev libpng-dev net-tools procps r-base libreadline-dev && \
pip install distribute --upgrade && \
pip install pyzmq ipython==2.4 jinja2 tornado pygments numpy biopython scikit-learn pandas \
scipy sklearn-pandas bioblend matplotlib patsy pysam khmer dendropy ggplot mpld3 sympy rpy2 && \
scipy sklearn-pandas bioblend matplotlib patsy pysam khmer ggplot mpld3 sympy rpy2 && \
apt-get autoremove -y && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

ADD ./startup.sh /startup.sh
Expand Down
109 changes: 37 additions & 72 deletions galaxy.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,16 @@
from bioblend import galaxy
from bioblend.galaxy.tools import ToolClient
from bioblend.galaxy.histories import HistoryClient
from bioblend.galaxy.datasets import DatasetClient
#!/usr/bin/env python
from bioblend.galaxy import objects
import subprocess
import argparse
import os
from string import Template
import logging
logging.getLogger("bioblend").setLevel(logging.WARNING)
DEBUG = os.environ.get('DEBUG', "False").lower() == 'true'
if DEBUG:
logging.basicConfig(level=logging.DEBUG)
logging.getLogger("bioblend").setLevel(logging.CRITICAL)
log = logging.getLogger()

# Consider not using objects deprecated.
DEFAULT_USE_OBJECTS = True
ENV_KEYS = ('DEBUG', 'GALAXY_WEB_PORT', 'NOTEBOOK_PASSWORD', 'CORS_ORIGIN',
'DOCKER_PORT', 'API_KEY', 'HISTORY_ID', 'REMOTE_HOST',
'GALAXY_URL')


def _get_conf():
conf = {}
for key in ENV_KEYS:
conf[key.lower()] = os.environ.get(key, None)
conf['galaxy_paster_port'] = conf['galaxy_web_port']
return conf


def _get_ip():
Expand All @@ -35,25 +23,23 @@ def _get_ip():
cmd_awk = ['awk', '{ print $2 }']
p3 = subprocess.Popen(cmd_awk, stdin=p2.stdout, stdout=subprocess.PIPE)
galaxy_ip = p3.stdout.read()
log.debug('Host IP determined to be %s', galaxy_ip)
return galaxy_ip


def _test_url(url, key, history_id, use_objects=False):
def _test_url(url, key, history_id):
"""Test the functionality of a given galaxy URL, to ensure we can connect
on that address."""
try:
if use_objects:
gi = objects.GalaxyInstance(url, key)
gi.histories.get(history_id)
else:
gi = galaxy.GalaxyInstance(url=url, key=key)
gi.histories.get_histories()
gi = objects.GalaxyInstance(url, key)
gi.histories.get(history_id)
log.debug('Galaxy URL %s is functional', url)
return gi
except Exception:
return None


def get_galaxy_connection( use_objects=DEFAULT_USE_OBJECTS ):
def get_galaxy_connection(history_id=None):
"""
Given access to the configuration dict that galaxy passed us, we try and connect to galaxy's API.
Expand All @@ -69,97 +55,76 @@ def get_galaxy_connection( use_objects=DEFAULT_USE_OBJECTS ):
through. This will succeed where the previous connection fails under
the conditions of REMOTE_USER and galaxy running under uWSGI.
"""
conf = _get_conf()
key = conf['api_key']
history_id = history_id or os.environ['HISTORY_ID']
key = os.environ['API_KEY']

### Customised/Raw galaxy_url ###
galaxy_ip = _get_ip()
# Substitute $DOCKER_HOST with real IP
url = Template(conf['galaxy_url']).safe_substitute({'DOCKER_HOST': galaxy_ip})
gi = _test_url(url, key, history_id, use_objects=use_objects)
url = Template(os.environ['GALAXY_URL']).safe_substitute({'DOCKER_HOST': galaxy_ip})
gi = _test_url(url, key, history_id)
if gi is not None:
return gi

### Failover, fully auto-detected URL ###
# Remove trailing slashes
app_path = conf['galaxy_url'].rstrip('/')
app_path = os.environ['GALAXY_URL'].rstrip('/')
# Remove protocol+host:port if included
app_path = ''.join(app_path.split('/')[3:])

if 'galaxy_paster_port' not in conf:
if 'GALAXY_WEB_PORT' not in os.environ:
# We've failed to detect a port in the config we were given by
# galaxy, so we won't be able to construct a valid URL
raise Exception("No port")
else:
# We should be able to find a port to connect to galaxy on via this
# conf var: galaxy_paster_port
galaxy_port = conf['galaxy_paster_port']
galaxy_port = os.environ['GALAXY_WEB_PORT']

built_galaxy_url = 'http://%s:%s/%s' % (galaxy_ip.strip(), galaxy_port, app_path.strip())
url = built_galaxy_url.rstrip('/')

gi = _test_url(url, key, history_id, use_objects=use_objects)
gi = _test_url(url, key, history_id)
if gi is not None:
return gi

### Fail ###
raise Exception("Could not connect to a galaxy instance. Please contact your SysAdmin for help with this error")


def _get_history_id():
"""
Extract the history ID from the config file.
"""
conf = _get_conf()
return conf['history_id']
msg = "Could not connect to a galaxy instance. Please contact your SysAdmin for help with this error"
raise Exception(msg)


def put(filename, file_type='auto', history_id=None, use_objects=DEFAULT_USE_OBJECTS):
def put(filename, file_type='auto', history_id=None):
"""
Given a filename of any file accessible to the docker instance, this
function will upload that file to galaxy using the current history.
Does not return anything.
"""
conf = _get_conf()
gi = get_galaxy_connection(use_objects)
history_id = history_id or _get_history_id()
if use_objects:
history = gi.histories.get( history_id )
history.upload_dataset(filename, file_type=file_type)
else:
tc = ToolClient( gi )
tc.upload_file(filename, history_id, file_type=file_type)
gi = get_galaxy_connection(history_id=history_id)
history_id = history_id or os.environ['HISTORY_ID']
history = gi.histories.get( history_id )
history.upload_dataset(filename, file_type=file_type)


def get(dataset_id, history_id=None, use_objects=DEFAULT_USE_OBJECTS):
def get(dataset_id, history_id=None):
"""
Given the history_id that is displayed to the user, this function will
download the file from the history and stores it under /import/
Return value is the path to the dataset stored under /import/
"""
conf = _get_conf()
gi = get_galaxy_connection(use_objects)
history_id = history_id or os.environ['HISTORY_ID']

gi = get_galaxy_connection(history_id=history_id)

file_path = '/import/%s' % dataset_id
history_id = history_id or _get_history_id()

# Cache the file requests. E.g. in the example of someone doing something
# silly like a get() for a Galaxy file in a for-loop, wouldn't want to
# re-download every time and add that overhead.
if not os.path.exists(file_path):
if use_objects:
history = gi.histories.get(history_id)
datasets = dict([( d.wrapped["hid"], d.id ) for d in history.get_datasets()])
dataset = history.get_dataset( datasets[dataset_id] )
dataset.download( open(file_path, 'wb') )
else:
hc = HistoryClient(gi)
dc = DatasetClient(gi)
dataset_mapping = dict([(dataset['hid'], dataset['id']) for dataset in hc.show_history(history_id, contents=True)])
try:
hc.download_dataset(history_id, dataset_mapping[dataset_id], file_path, use_default_filename=False, to_ext=None)
except:
dc.download_dataset(dataset_mapping[dataset_id], file_path, use_default_filename=False)
history = gi.histories.get(history_id)
datasets = dict([( d.wrapped["hid"], d.id ) for d in history.get_datasets()])
dataset = history.get_dataset( datasets[dataset_id] )
dataset.download( open(file_path, 'wb') )

return file_path

Expand All @@ -175,6 +140,6 @@ def get(dataset_id, history_id=None, use_objects=DEFAULT_USE_OBJECTS):

if args.action == 'get':
# Ensure it's a numerical value
get(int(args.argument))
get(int(args.argument), history_id=args.history_id)
elif args.action == 'put':
put(args.argument, file_type=args.filetype)
put(args.argument, file_type=args.filetype, history_id=args.history_id)

0 comments on commit ebb9854

Please sign in to comment.