Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New table API endpoints for lower level table access #564

Merged
merged 22 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions omeroweb/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,13 @@ def check_session_engine(s):
"Prevent download of OMERO.tables exceeding this number of rows "
"in a single request.",
],
"omero.web.max_table_slice_size": [
"MAX_TABLE_SLICE_SIZE",
1_000_000,
int,
"Prevent download of OMERO.tables exceeding this number of cells "
"in a single request.",
knabar marked this conversation as resolved.
Show resolved Hide resolved
],
# VIEWER
"omero.web.viewer.view": [
"VIEWER_VIEW",
Expand Down
29 changes: 29 additions & 0 deletions omeroweb/webgateway/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
from django.urls import re_path
from omeroweb.webgateway import views


COMPACT_JSON = {"_json_dumps_params": {"separators": (",", ":")}}


webgateway = re_path(r"^$", views.index, name="webgateway")
"""
Returns a main prefix
Expand Down Expand Up @@ -600,6 +604,28 @@
"""


perform_get_where_list = re_path(
r"^table/(?P<fileid>\d+)/rows/$",
views.perform_get_where_list,
name="webgateway_perform_get_where_list",
kwargs=COMPACT_JSON,
)
"""
Query a table specified by fileid and return the matching rows
"""


perform_slice = re_path(
r"^table/(?P<fileid>\d+)/slice/$",
views.perform_slice,
name="webgateway_perform_slice",
kwargs=COMPACT_JSON,
knabar marked this conversation as resolved.
Show resolved Hide resolved
)
"""
Fetch a table slice specified by rows and columns
"""


urlpatterns = [
webgateway,
render_image,
Expand Down Expand Up @@ -657,4 +683,7 @@
table_obj_id_bitmask,
object_table_query,
open_with_options,
# low-level table API
perform_get_where_list,
perform_slice,
]
178 changes: 177 additions & 1 deletion omeroweb/webgateway/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1463,7 +1463,9 @@ def wrap(request, *args, **kwargs):
# NB: To support old api E.g. /get_rois_json/
# We need to support lists
safe = type(rv) is dict
return JsonResponse(rv, safe=safe)
# Allow optional JSON dumps parameters
json_params = kwargs.get("_json_dumps_params", None)
return JsonResponse(rv, safe=safe, json_dumps_params=json_params)
except Exception as ex:
# Default status is 500 'server error'
# But we try to handle all 'expected' errors appropriately
Expand Down Expand Up @@ -3472,3 +3474,177 @@ def get_image_rdefs_json(request, img_id=None, conn=None, **kwargs):
except Exception:
logger.debug(traceback.format_exc())
return {"error": "Failed to retrieve rdefs"}


@login_required()
@jsonp
def perform_get_where_list(request, fileid, conn=None, **kwargs):
"""
Retrieves matching row numbers for a table query

Example: /webgateway/table/123/rows/?query=object<100&start=50

Query arguments:
query: table query in PyTables syntax
start: row number to start searching

Uses MAX_TABLE_SLICE_SIZE to determine how many rows will be searched.

@param request: http request.
@param fileid: the id of the table
@param conn: L{omero.gateway.BlitzGateway}
@param **kwargs: unused
@return: A dictionary with keys 'rows' and 'meta' in the success case,
one with key 'error' if something went wrong.
'rows' is an array of matching row numbers.
'meta' includes:
- rowCount: total number of rows in table
- columnCount: total number of columns in table
- start: row on which search was started
- end: row on which search ended (exclusive), can be used
for follow-up query as new start value if end<rowCount
- maxCells: maximum number of cells that can be requested
in one request
- partialCount: number of matching rows returned in this
response. Important: if start>0 and/or end<rowCount,
this may not be the total number of matching rows in the
table!
"""

query = request.GET.get("query")
if not query:
return {"error": "Must specify query"}
try:
start = int(request.GET.get("start"))
except (ValueError, TypeError):
start = 0
ctx = conn.createServiceOptsDict()
ctx.setOmeroGroup("-1")
resources = conn.getSharedResources()
table = resources.openTable(omero.model.OriginalFileI(fileid), ctx)
if not table:
return {"error": "Table %s not found" % fileid}
try:
row_count = table.getNumberOfRows()
column_count = len(table.getHeaders())
end = min(row_count, start + settings.MAX_TABLE_SLICE_SIZE)
logger.info(f"Query '{query}' from rows {start} to {end}")
hits = table.getWhereList(query, None, start, end, 1) if start < end else []
return {
"rows": hits,
"meta": {
"partialCount": len(hits),
"rowCount": row_count,
"columnCount": column_count,
"start": start,
"end": end,
"maxCells": settings.MAX_TABLE_SLICE_SIZE,
},
}
except Exception:
return {"error": "Error executing query: %s" % query}
finally:
table.close()


@login_required()
@jsonp
def perform_slice(request, fileid, conn=None, **kwargs):
"""
Performs a table slice

Example: /webgateway/table/123/slice/?rows=1,2,5-10&columns=0,3-4

Query arguments:
rows: row numbers to retrieve in comma-separated list,
hyphen-separated ranges allowed
columns: column numbers to retrieve in comma-separated list,
hyphen-separated ranges allowed

At most MAX_TABLE_SLICE_SIZE data points (number of rows * number of columns) can
be retrieved, if more are requested, an error is returned.

@param request: http request.
@param fileid: the id of the table
@param conn: L{omero.gateway.BlitzGateway}
@param **kwargs: unused
@return: A dictionary with keys 'columns' and 'meta' in the success
case, one with key 'error' if something went wrong.
'columns' is an array of column data arrays
'meta' includes:
- rowCount: total number of rows in table
- columns: names of columns in same order as data arrays
- columnCount: total number of columns in table
- maxCells: maximum number of cells that can be requested
in one request
"""

def parse(item):
try:
yield int(item)
except ValueError:
start, end = item.split("-")
if start > end:
raise ValueError("Invalid range")
yield from range(int(start), int(end) + 1)

def limit_generator(generator, max_items):
for counter, item in enumerate(generator):
if counter >= max_items:
raise ValueError("Too many items")
yield item

source = request.POST if request.method == "POST" else request.GET
try:
# Limit number of items to avoid problems when given massive ranges
rows = list(
limit_generator(
(row for item in source.get("rows").split(",") for row in parse(item)),
settings.MAX_TABLE_SLICE_SIZE,
)
)
columns = list(
limit_generator(
(
column
for item in source.get("columns").split(",")
for column in parse(item)
),
settings.MAX_TABLE_SLICE_SIZE / len(rows),
)
)
except (ValueError, AttributeError) as error:
return {
"error": f"Need comma-separated list of rows and columns ({str(error)})"
}
ctx = conn.createServiceOptsDict()
ctx.setOmeroGroup("-1")
resources = conn.getSharedResources()
table = resources.openTable(omero.model.OriginalFileI(fileid), ctx)
if not table:
return {"error": "Table %s not found" % fileid}
column_count = len(table.getHeaders())
row_count = table.getNumberOfRows()
if not all(0 <= column < column_count for column in columns):
return {"error": "Columns out of range"}
if not all(0 <= row < row_count for row in rows):
return {"error": "Rows out of range"}
try:
columns = table.slice(columns, rows).columns
return {
"columns": [column.values for column in columns],
"meta": {
"columns": [column.name for column in columns],
"rowCount": row_count,
"columnCount": column_count,
"maxCells": settings.MAX_TABLE_SLICE_SIZE,
},
}
except Exception as error:
logger.exception(
"Error slicing table %s with %d columns and %d rows"
% (fileid, len(columns), len(rows))
)
return {"error": f"Error slicing table ({str(error)})"}
finally:
table.close()
Loading