Skip to content

Commit

Permalink
Merge pull request #564 from knabar/feature-row-queries
Browse files Browse the repository at this point in the history
New table API endpoints for lower level table access
  • Loading branch information
knabar authored Jul 29, 2024
2 parents 0204b31 + c581586 commit b071a89
Show file tree
Hide file tree
Showing 3 changed files with 213 additions and 1 deletion.
7 changes: 7 additions & 0 deletions omeroweb/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,13 @@ def check_session_engine(s):
"Prevent download of OMERO.tables exceeding this number of rows "
"in a single request.",
],
"omero.web.max_table_slice_size": [
"MAX_TABLE_SLICE_SIZE",
1_000_000,
int,
"Maximum number of cells that can be retrieved in a single call "
"to the table slicing endpoint.",
],
# VIEWER
"omero.web.viewer.view": [
"VIEWER_VIEW",
Expand Down
29 changes: 29 additions & 0 deletions omeroweb/webgateway/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
from django.urls import re_path
from omeroweb.webgateway import views


COMPACT_JSON = {"_json_dumps_params": {"separators": (",", ":")}}


webgateway = re_path(r"^$", views.index, name="webgateway")
"""
Returns a main prefix
Expand Down Expand Up @@ -600,6 +604,28 @@
"""


table_get_where_list = re_path(
r"^table/(?P<fileid>\d+)/rows/$",
views.table_get_where_list,
name="webgateway_table_get_where_list",
kwargs=COMPACT_JSON,
)
"""
Query a table specified by fileid and return the matching rows
"""


table_slice = re_path(
r"^table/(?P<fileid>\d+)/slice/$",
views.table_slice,
name="webgateway_table_slice",
kwargs=COMPACT_JSON,
)
"""
Fetch a table slice specified by rows and columns
"""


urlpatterns = [
webgateway,
render_image,
Expand Down Expand Up @@ -657,4 +683,7 @@
table_obj_id_bitmask,
object_table_query,
open_with_options,
# low-level table API
table_get_where_list,
table_slice,
]
178 changes: 177 additions & 1 deletion omeroweb/webgateway/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1463,7 +1463,9 @@ def wrap(request, *args, **kwargs):
# NB: To support old api E.g. /get_rois_json/
# We need to support lists
safe = type(rv) is dict
return JsonResponse(rv, safe=safe)
# Allow optional JSON dumps parameters
json_params = kwargs.get("_json_dumps_params", None)
return JsonResponse(rv, safe=safe, json_dumps_params=json_params)
except Exception as ex:
# Default status is 500 'server error'
# But we try to handle all 'expected' errors appropriately
Expand Down Expand Up @@ -3485,3 +3487,177 @@ def get_image_rdefs_json(request, img_id=None, conn=None, **kwargs):
except Exception:
logger.debug(traceback.format_exc())
return {"error": "Failed to retrieve rdefs"}


@login_required()
@jsonp
def table_get_where_list(request, fileid, conn=None, **kwargs):
"""
Retrieves matching row numbers for a table query
Example: /webgateway/table/123/rows/?query=object<100&start=50
Query arguments:
query: table query in PyTables syntax
start: row number to start searching
Uses MAX_TABLE_SLICE_SIZE to determine how many rows will be searched.
@param request: http request.
@param fileid: the id of the table
@param conn: L{omero.gateway.BlitzGateway}
@param **kwargs: unused
@return: A dictionary with keys 'rows' and 'meta' in the success case,
one with key 'error' if something went wrong.
'rows' is an array of matching row numbers.
'meta' includes:
- rowCount: total number of rows in table
- columnCount: total number of columns in table
- start: row on which search was started
- end: row on which search ended (exclusive), can be used
for follow-up query as new start value if end<rowCount
- maxCells: maximum number of cells that can be requested
in one request
- partialCount: number of matching rows returned in this
response. Important: if start>0 and/or end<rowCount,
this may not be the total number of matching rows in the
table!
"""

query = request.GET.get("query")
if not query:
return {"error": "Must specify query"}
try:
start = int(request.GET.get("start"))
except (ValueError, TypeError):
start = 0
ctx = conn.createServiceOptsDict()
ctx.setOmeroGroup("-1")
resources = conn.getSharedResources()
table = resources.openTable(omero.model.OriginalFileI(fileid), ctx)
if not table:
return {"error": "Table %s not found" % fileid}
try:
row_count = table.getNumberOfRows()
column_count = len(table.getHeaders())
end = min(row_count, start + settings.MAX_TABLE_SLICE_SIZE)
logger.info(f"Query '{query}' from rows {start} to {end}")
hits = table.getWhereList(query, None, start, end, 1) if start < end else []
return {
"rows": hits,
"meta": {
"partialCount": len(hits),
"rowCount": row_count,
"columnCount": column_count,
"start": start,
"end": end,
"maxCells": settings.MAX_TABLE_SLICE_SIZE,
},
}
except Exception:
return {"error": "Error executing query: %s" % query}
finally:
table.close()


@login_required()
@jsonp
def table_slice(request, fileid, conn=None, **kwargs):
"""
Performs a table slice
Example: /webgateway/table/123/slice/?rows=1,2,5-10&columns=0,3-4
Query arguments:
rows: row numbers to retrieve in comma-separated list,
hyphen-separated ranges allowed
columns: column numbers to retrieve in comma-separated list,
hyphen-separated ranges allowed
At most MAX_TABLE_SLICE_SIZE data points (number of rows * number of columns) can
be retrieved, if more are requested, an error is returned.
@param request: http request.
@param fileid: the id of the table
@param conn: L{omero.gateway.BlitzGateway}
@param **kwargs: unused
@return: A dictionary with keys 'columns' and 'meta' in the success
case, one with key 'error' if something went wrong.
'columns' is an array of column data arrays
'meta' includes:
- rowCount: total number of rows in table
- columns: names of columns in same order as data arrays
- columnCount: total number of columns in table
- maxCells: maximum number of cells that can be requested
in one request
"""

def parse(item):
try:
yield int(item)
except ValueError:
start, end = item.split("-")
if start > end:
raise ValueError("Invalid range")
yield from range(int(start), int(end) + 1)

def limit_generator(generator, max_items):
for counter, item in enumerate(generator):
if counter >= max_items:
raise ValueError("Too many items")
yield item

source = request.POST if request.method == "POST" else request.GET
try:
# Limit number of items to avoid problems when given massive ranges
rows = list(
limit_generator(
(row for item in source.get("rows").split(",") for row in parse(item)),
settings.MAX_TABLE_SLICE_SIZE,
)
)
columns = list(
limit_generator(
(
column
for item in source.get("columns").split(",")
for column in parse(item)
),
settings.MAX_TABLE_SLICE_SIZE / len(rows),
)
)
except (ValueError, AttributeError) as error:
return {
"error": f"Need comma-separated list of rows and columns ({str(error)})"
}
ctx = conn.createServiceOptsDict()
ctx.setOmeroGroup("-1")
resources = conn.getSharedResources()
table = resources.openTable(omero.model.OriginalFileI(fileid), ctx)
if not table:
return {"error": "Table %s not found" % fileid}
column_count = len(table.getHeaders())
row_count = table.getNumberOfRows()
if not all(0 <= column < column_count for column in columns):
return {"error": "Columns out of range"}
if not all(0 <= row < row_count for row in rows):
return {"error": "Rows out of range"}
try:
columns = table.slice(columns, rows).columns
return {
"columns": [column.values for column in columns],
"meta": {
"columns": [column.name for column in columns],
"rowCount": row_count,
"columnCount": column_count,
"maxCells": settings.MAX_TABLE_SLICE_SIZE,
},
}
except Exception as error:
logger.exception(
"Error slicing table %s with %d columns and %d rows"
% (fileid, len(columns), len(rows))
)
return {"error": f"Error slicing table ({str(error)})"}
finally:
table.close()

0 comments on commit b071a89

Please sign in to comment.