From d78b71aeba189fa8f046ab71192211b049bb5b42 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Thu, 20 Jun 2024 15:46:43 +0200 Subject: [PATCH 01/22] Add new API endpoints for row-based query and slice --- omeroweb/settings.py | 8 ++++ omeroweb/webgateway/urls.py | 19 ++++++++ omeroweb/webgateway/views.py | 84 ++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+) diff --git a/omeroweb/settings.py b/omeroweb/settings.py index 6851284c66..d54f4917b1 100755 --- a/omeroweb/settings.py +++ b/omeroweb/settings.py @@ -716,6 +716,14 @@ def check_session_engine(s): "Prevent download of OMERO.tables exceeding this number of rows " "in a single request.", ], + "omero.web.max_table_slice_size": [ + "MAX_TABLE_SLICE_SIZE", + 1_000_000, + int, + "Prevent download of OMERO.tables exceeding this number of cells " + "in a single request.", + ], + # VIEWER "omero.web.viewer.view": [ "VIEWER_VIEW", diff --git a/omeroweb/webgateway/urls.py b/omeroweb/webgateway/urls.py index 3d529ab41a..b509a42417 100644 --- a/omeroweb/webgateway/urls.py +++ b/omeroweb/webgateway/urls.py @@ -600,6 +600,22 @@ """ +perform_get_where_list = re_path( + r"^table/(?P\d+)/rows/$", views.perform_get_where_list, name="webgateway_perform_get_where_list" +) +""" +Query a table specified by fileid and return the matching rows +""" + + +perform_slice = re_path( + r"^table/(?P\d+)/slice/$", views.perform_slice, name="webgateway_perform_slice" +) +""" +Fetch a table slice specified by rows and columns +""" + + urlpatterns = [ webgateway, render_image, @@ -657,4 +673,7 @@ table_obj_id_bitmask, object_table_query, open_with_options, + + perform_get_where_list, + perform_slice, ] diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index 2b25b9a092..5fc1151139 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3472,3 +3472,87 @@ def get_image_rdefs_json(request, img_id=None, conn=None, **kwargs): except Exception: logger.debug(traceback.format_exc()) return {"error": "Failed to retrieve rdefs"} + + +@login_required() +@jsonp +def perform_get_where_list(request, fileid, conn=None, **kwargs): + query = request.GET.get('query') + if not query: + return {'error': 'Must specify query'} + try: + start = int(request.GET.get('start')) + except (ValueError, TypeError): + start = 0 + ctx = conn.createServiceOptsDict() + ctx.setOmeroGroup('-1') + resources = conn.getSharedResources() + table = resources.openTable(omero.model.OriginalFileI(fileid), ctx) + if not table: + return {'error': 'Table %s not found' % fileid} + try: + rows = table.getNumberOfRows() + end = min(rows, start + settings.MAX_TABLE_SLICE_SIZE) + if start >= end: + hits = [] + else: + logger.info(query) + hits = table.getWhereList(query, None, start, end, 1) + # TODO: start and end may be ignored, filter here - remove once backend is fixed + hits = [hit for hit in hits if start <= hit < end] + return { + 'rows': hits, + 'meta': { + 'rowCount': rows, + 'start': start, + 'end': end, + } + } + except Exception: + return {'error': 'Error executing query: %s' % query} + finally: + table.close() + + +@login_required() +@jsonp +def perform_slice(request, fileid, conn=None, **kwargs): + + def parse(item): + try: + yield int(item) + except ValueError: + start, end = item.split('-') + yield from range(int(start), int(end) + 1) + + source = request.POST if request.method == 'POST' else request.GET + try: + rows = [row for item in source.get('rows').split(',') for row in parse(item)] + columns = [column for item in source.get('columns').split(',') for column in parse(item)] + except ValueError: + return {'error': 'Need to specify comma-separated list of rows and columns'} + count = len(rows) * len(columns) + if count > settings.MAX_TABLE_SLICE_SIZE: + return {'error': 'Invalid slice cell count'} + ctx = conn.createServiceOptsDict() + ctx.setOmeroGroup('-1') + resources = conn.getSharedResources() + table = resources.openTable(omero.model.OriginalFileI(fileid), ctx) + if not table: + return {'error': 'Table %s not found' % fileid} + try: + try: + columns = table.slice(columns, rows).columns + except: + logger.exception('Error slicing table %s with %d columns and %d rows' % (fileid, len(columns), len(rows))) + return {'error': 'Error slicing table'} + return { + 'columns': [column.values for column in columns], + 'meta': { + 'columns': [column.name for column in columns], + 'rowCount': table.getNumberOfRows(), + }, + } + finally: + table.close() + From 7e019bfd744dd76487ea450f0f36ac984df9e28e Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Thu, 20 Jun 2024 16:20:01 +0200 Subject: [PATCH 02/22] Add doc strings --- omeroweb/webgateway/views.py | 47 ++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index 5fc1151139..f810b0d820 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3477,6 +3477,30 @@ def get_image_rdefs_json(request, img_id=None, conn=None, **kwargs): @login_required() @jsonp def perform_get_where_list(request, fileid, conn=None, **kwargs): + """ + Retrieves matching row numbers for a table query + + Example: /webgateway/table/123/rows/?query=object<100&start=50 + + Query arguments: + query: table query in PyTables syntax + start: row number to start searching + + Uses MAX_TABLE_SLICE_SIZE to determine how many rows will be searched. + + @param request: http request. + @param img_id: the id of the image in question + @param conn: L{omero.gateway.BlitzGateway} + @param **kwargs: unused + @return: A dictionary with keys 'rows' and 'meta' in the success case, + one with key 'error' if something went wrong. + 'rows' is an array of matching row numbers. + 'meta' includes: + - rowCount: total number of rows in table + - start: row on which search was started + - end: row on which search ended (exclusive), can be used for + follow-up query as new start value if end Date: Thu, 20 Jun 2024 16:52:40 +0200 Subject: [PATCH 03/22] Simplify code --- omeroweb/webgateway/views.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index f810b0d820..92195d89a3 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3588,11 +3588,7 @@ def parse(item): if not table: return {'error': 'Table %s not found' % fileid} try: - try: - columns = table.slice(columns, rows).columns - except: - logger.exception('Error slicing table %s with %d columns and %d rows' % (fileid, len(columns), len(rows))) - return {'error': 'Error slicing table'} + columns = table.slice(columns, rows).columns return { 'columns': [column.values for column in columns], 'meta': { @@ -3600,6 +3596,9 @@ def parse(item): 'rowCount': table.getNumberOfRows(), }, } + except: + logger.exception('Error slicing table %s with %d columns and %d rows' % (fileid, len(columns), len(rows))) + return {'error': 'Error slicing table'} finally: table.close() From ae9e25ee59486eb0cfb0db33a3aa405fedfad09f Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Fri, 21 Jun 2024 09:41:24 +0200 Subject: [PATCH 04/22] Add comment --- omeroweb/webgateway/urls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/omeroweb/webgateway/urls.py b/omeroweb/webgateway/urls.py index b509a42417..dc88c9c10f 100644 --- a/omeroweb/webgateway/urls.py +++ b/omeroweb/webgateway/urls.py @@ -673,7 +673,7 @@ table_obj_id_bitmask, object_table_query, open_with_options, - + # low-level table API perform_get_where_list, perform_slice, ] From 387f9916197dfd5cbabefaf0ee32d16343b73546 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Fri, 21 Jun 2024 11:46:32 +0200 Subject: [PATCH 05/22] Allow JSON output without whitespace and use for new endpoints --- omeroweb/webgateway/urls.py | 14 ++++++++++++-- omeroweb/webgateway/views.py | 4 +++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/omeroweb/webgateway/urls.py b/omeroweb/webgateway/urls.py index dc88c9c10f..d0a2e05089 100644 --- a/omeroweb/webgateway/urls.py +++ b/omeroweb/webgateway/urls.py @@ -16,6 +16,10 @@ from django.urls import re_path from omeroweb.webgateway import views + +COMPACT_JSON = {'_json_dumps_params': {'separators': (',', ':')}} + + webgateway = re_path(r"^$", views.index, name="webgateway") """ Returns a main prefix @@ -601,7 +605,10 @@ perform_get_where_list = re_path( - r"^table/(?P\d+)/rows/$", views.perform_get_where_list, name="webgateway_perform_get_where_list" + r"^table/(?P\d+)/rows/$", + views.perform_get_where_list, + name="webgateway_perform_get_where_list", + kwargs=COMPACT_JSON, ) """ Query a table specified by fileid and return the matching rows @@ -609,7 +616,10 @@ perform_slice = re_path( - r"^table/(?P\d+)/slice/$", views.perform_slice, name="webgateway_perform_slice" + r"^table/(?P\d+)/slice/$", + views.perform_slice, + name="webgateway_perform_slice", + kwargs=COMPACT_JSON, ) """ Fetch a table slice specified by rows and columns diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index 92195d89a3..0585056a68 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -1463,7 +1463,9 @@ def wrap(request, *args, **kwargs): # NB: To support old api E.g. /get_rois_json/ # We need to support lists safe = type(rv) is dict - return JsonResponse(rv, safe=safe) + # Allow optional JSON dumps parameters + json_params = kwargs.get("_json_dumps_params", None) + return JsonResponse(rv, safe=safe, json_dumps_params=json_params) except Exception as ex: # Default status is 500 'server error' # But we try to handle all 'expected' errors appropriately From db885119d897d00078f1ea83ae8427dbe2d506c0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 21 Jun 2024 10:10:52 +0000 Subject: [PATCH 06/22] Code cleanup --- omeroweb/settings.py | 1 - omeroweb/webgateway/urls.py | 2 +- omeroweb/webgateway/views.py | 82 ++++++++++++++++++++---------------- 3 files changed, 46 insertions(+), 39 deletions(-) diff --git a/omeroweb/settings.py b/omeroweb/settings.py index d54f4917b1..b5feefd446 100755 --- a/omeroweb/settings.py +++ b/omeroweb/settings.py @@ -723,7 +723,6 @@ def check_session_engine(s): "Prevent download of OMERO.tables exceeding this number of cells " "in a single request.", ], - # VIEWER "omero.web.viewer.view": [ "VIEWER_VIEW", diff --git a/omeroweb/webgateway/urls.py b/omeroweb/webgateway/urls.py index d0a2e05089..7b519d97eb 100644 --- a/omeroweb/webgateway/urls.py +++ b/omeroweb/webgateway/urls.py @@ -17,7 +17,7 @@ from omeroweb.webgateway import views -COMPACT_JSON = {'_json_dumps_params': {'separators': (',', ':')}} +COMPACT_JSON = {"_json_dumps_params": {"separators": (",", ":")}} webgateway = re_path(r"^$", views.index, name="webgateway") diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index 0585056a68..34dc4c830d 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3500,22 +3500,22 @@ def perform_get_where_list(request, fileid, conn=None, **kwargs): 'meta' includes: - rowCount: total number of rows in table - start: row on which search was started - - end: row on which search ended (exclusive), can be used for - follow-up query as new start value if end settings.MAX_TABLE_SLICE_SIZE: - return {'error': 'Invalid slice cell count'} + return {"error": "Invalid slice cell count"} ctx = conn.createServiceOptsDict() - ctx.setOmeroGroup('-1') + ctx.setOmeroGroup("-1") resources = conn.getSharedResources() table = resources.openTable(omero.model.OriginalFileI(fileid), ctx) if not table: - return {'error': 'Table %s not found' % fileid} + return {"error": "Table %s not found" % fileid} try: columns = table.slice(columns, rows).columns return { - 'columns': [column.values for column in columns], - 'meta': { - 'columns': [column.name for column in columns], - 'rowCount': table.getNumberOfRows(), + "columns": [column.values for column in columns], + "meta": { + "columns": [column.name for column in columns], + "rowCount": table.getNumberOfRows(), }, } - except: - logger.exception('Error slicing table %s with %d columns and %d rows' % (fileid, len(columns), len(rows))) - return {'error': 'Error slicing table'} + except Exception: + logger.exception( + "Error slicing table %s with %d columns and %d rows" + % (fileid, len(columns), len(rows)) + ) + return {"error": "Error slicing table"} finally: table.close() - From 3623886d22aec1443697c5c583680aeb630b4956 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Mon, 1 Jul 2024 14:28:24 +0200 Subject: [PATCH 07/22] Better errors --- omeroweb/webgateway/views.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index 34dc4c830d..97b5b4b5da 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3517,8 +3517,9 @@ def perform_get_where_list(request, fileid, conn=None, **kwargs): if not table: return {"error": "Table %s not found" % fileid} try: - rows = table.getNumberOfRows() - end = min(rows, start + settings.MAX_TABLE_SLICE_SIZE) + row_count = table.getNumberOfRows() + column_count = len(table.getHeaders()) + end = min(row_count, start + settings.MAX_TABLE_SLICE_SIZE) if start >= end: hits = [] else: @@ -3529,7 +3530,8 @@ def perform_get_where_list(request, fileid, conn=None, **kwargs): return { "rows": hits, "meta": { - "rowCount": rows, + "rowCount": row_count, + "columnCount": column_count, "start": start, "end": end, }, @@ -3574,6 +3576,8 @@ def parse(item): yield int(item) except ValueError: start, end = item.split("-") + if start > end: + raise ValueError("Invalid range") yield from range(int(start), int(end) + 1) source = request.POST if request.method == "POST" else request.GET @@ -3584,8 +3588,8 @@ def parse(item): for item in source.get("columns").split(",") for column in parse(item) ] - except ValueError: - return {"error": "Need to specify comma-separated list of rows and columns"} + except (ValueError, AttributeError) as error: + return {"error": f"Need to specify comma-separated list of rows and columns ({str(error)})"} count = len(rows) * len(columns) if count > settings.MAX_TABLE_SLICE_SIZE: return {"error": "Invalid slice cell count"} @@ -3595,6 +3599,9 @@ def parse(item): table = resources.openTable(omero.model.OriginalFileI(fileid), ctx) if not table: return {"error": "Table %s not found" % fileid} + column_count = len(table.getHeaders()) + if any(column >= column_count for column in columns): + return {"error": "Columns out of range"} try: columns = table.slice(columns, rows).columns return { @@ -3602,13 +3609,14 @@ def parse(item): "meta": { "columns": [column.name for column in columns], "rowCount": table.getNumberOfRows(), + "columnCount": column_count, }, } - except Exception: + except Exception as error: logger.exception( "Error slicing table %s with %d columns and %d rows" % (fileid, len(columns), len(rows)) ) - return {"error": "Error slicing table"} + return {"error": f"Error slicing table ({str(error)})"} finally: table.close() From 72e5d9f55dcf5c62f408d49d4ed6ba7c26071601 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Mon, 1 Jul 2024 15:10:30 +0200 Subject: [PATCH 08/22] Abort when too many items given --- omeroweb/webgateway/views.py | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index 97b5b4b5da..6a660e41ba 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3580,19 +3580,35 @@ def parse(item): raise ValueError("Invalid range") yield from range(int(start), int(end) + 1) + def limit_generator(generator, max_items): + for counter, item in enumerate(generator): + if counter >= max_items: + raise ValueError("Too many items") + yield item + source = request.POST if request.method == "POST" else request.GET try: - rows = [row for item in source.get("rows").split(",") for row in parse(item)] - columns = [ - column - for item in source.get("columns").split(",") - for column in parse(item) - ] + # Limit number of items to avoid problems when given massive ranges + rows = list(limit_generator( + ( + row + for item in source.get("rows").split(",") + for row in parse(item) + ), + settings.MAX_TABLE_SLICE_SIZE + )) + columns = list(limit_generator( + ( + column + for item in source.get("columns").split(",") + for column in parse(item) + ), + settings.MAX_TABLE_SLICE_SIZE / len(rows) + )) except (ValueError, AttributeError) as error: - return {"error": f"Need to specify comma-separated list of rows and columns ({str(error)})"} - count = len(rows) * len(columns) - if count > settings.MAX_TABLE_SLICE_SIZE: - return {"error": "Invalid slice cell count"} + return { + "error": f"Need comma-separated list of rows and columns ({str(error)})" + } ctx = conn.createServiceOptsDict() ctx.setOmeroGroup("-1") resources = conn.getSharedResources() From a2104c07de4de57ca21f8f4a43c1a3bb356f0837 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 13:10:49 +0000 Subject: [PATCH 09/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- omeroweb/webgateway/views.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index 6a660e41ba..8b4ec2f163 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3589,22 +3589,22 @@ def limit_generator(generator, max_items): source = request.POST if request.method == "POST" else request.GET try: # Limit number of items to avoid problems when given massive ranges - rows = list(limit_generator( - ( - row - for item in source.get("rows").split(",") - for row in parse(item) - ), - settings.MAX_TABLE_SLICE_SIZE - )) - columns = list(limit_generator( - ( - column - for item in source.get("columns").split(",") - for column in parse(item) - ), - settings.MAX_TABLE_SLICE_SIZE / len(rows) - )) + rows = list( + limit_generator( + (row for item in source.get("rows").split(",") for row in parse(item)), + settings.MAX_TABLE_SLICE_SIZE, + ) + ) + columns = list( + limit_generator( + ( + column + for item in source.get("columns").split(",") + for column in parse(item) + ), + settings.MAX_TABLE_SLICE_SIZE / len(rows), + ) + ) except (ValueError, AttributeError) as error: return { "error": f"Need comma-separated list of rows and columns ({str(error)})" From d138dac57ea4b8394b36980ba47e7be304763b94 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Mon, 1 Jul 2024 16:10:29 +0200 Subject: [PATCH 10/22] Add max cell setting to metadata --- omeroweb/webgateway/views.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index 8b4ec2f163..7c7923dd71 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3534,6 +3534,7 @@ def perform_get_where_list(request, fileid, conn=None, **kwargs): "columnCount": column_count, "start": start, "end": end, + "maxCells": settings.MAX_TABLE_SLICE_SIZE, }, } except Exception: @@ -3626,6 +3627,7 @@ def limit_generator(generator, max_items): "columns": [column.name for column in columns], "rowCount": table.getNumberOfRows(), "columnCount": column_count, + "maxCells": settings.MAX_TABLE_SLICE_SIZE, }, } except Exception as error: From 0ca9a083c05f0b5bf207233f445520fa13175e6b Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Mon, 1 Jul 2024 16:53:19 +0200 Subject: [PATCH 11/22] Allow collapsing of resulting rows --- omeroweb/webgateway/views.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index 7c7923dd71..2cf6f6e40b 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3503,6 +3503,27 @@ def perform_get_where_list(request, fileid, conn=None, **kwargs): - end: row on which search ended (exclusive), can be used for follow-up query as new start value if end Date: Tue, 2 Jul 2024 10:29:03 +0200 Subject: [PATCH 12/22] Better comments and logging --- omeroweb/webgateway/views.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index 2cf6f6e40b..f26eecaf97 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3491,7 +3491,7 @@ def perform_get_where_list(request, fileid, conn=None, **kwargs): Uses MAX_TABLE_SLICE_SIZE to determine how many rows will be searched. @param request: http request. - @param img_id: the id of the image in question + @param fileid: the id of the table @param conn: L{omero.gateway.BlitzGateway} @param **kwargs: unused @return: A dictionary with keys 'rows' and 'meta' in the success case, @@ -3509,11 +3509,11 @@ def collapse_ranges(generator): def dump_range(): if range_start is not None: - if range_start == range_end: + if range_start == range_end: # single value yield range_start - elif range_start + 1 == range_end: + elif range_start + 1 == range_end: # two values yield from (range_start, range_end) - else: + else: # three or more values, collapse yield f'{range_start}-{range_end}' for hit in generator: @@ -3542,10 +3542,10 @@ def dump_range(): row_count = table.getNumberOfRows() column_count = len(table.getHeaders()) end = min(row_count, start + settings.MAX_TABLE_SLICE_SIZE) + logger.info(f"Query '{query}' from rows {start} to {end}") if start >= end: hits = [] else: - logger.info(query) hits = table.getWhereList(query, None, start, end, 1) # TODO: getWhereList may ignore start and end - remove once fixed hits = (hit for hit in hits if start <= hit < end) @@ -3585,7 +3585,7 @@ def perform_slice(request, fileid, conn=None, **kwargs): be retrieved, if more are requested, an error is returned. @param request: http request. - @param img_id: the id of the image in question + @param fileid: the id of the table @param conn: L{omero.gateway.BlitzGateway} @param **kwargs: unused @return: A dictionary with keys 'columns' and 'meta' in the success From 53438ec09dafac63c7fbe6c120f061ded3f5b783 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Tue, 2 Jul 2024 10:31:12 +0200 Subject: [PATCH 13/22] Return result count for this request --- omeroweb/webgateway/views.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index f26eecaf97..a04753bb9f 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3504,6 +3504,14 @@ def perform_get_where_list(request, fileid, conn=None, **kwargs): for follow-up query as new start value if end Date: Tue, 2 Jul 2024 10:41:09 +0200 Subject: [PATCH 14/22] Update docstrings --- omeroweb/webgateway/views.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index a04753bb9f..b16b2798bc 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3499,9 +3499,16 @@ def perform_get_where_list(request, fileid, conn=None, **kwargs): 'rows' is an array of matching row numbers. 'meta' includes: - rowCount: total number of rows in table + - columnCount: total number of columns in table - start: row on which search was started - end: row on which search ended (exclusive), can be used for follow-up query as new start value if end0 and/or end Date: Tue, 2 Jul 2024 10:47:37 +0200 Subject: [PATCH 15/22] Fix counter when not collapsing --- omeroweb/webgateway/views.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index b16b2798bc..005b8922fb 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3519,7 +3519,7 @@ def __init__(self, generator): def __iter__(self): self.value = yield from self.generator - def collapse_ranges(generator): + def collapse_ranges(generator, collapse=True): range_start = range_end = None def dump_range(): @@ -3534,6 +3534,9 @@ def dump_range(): count = 0 for hit in generator: count += 1 + if not collapse: + yield hit + continue if hit - 1 == range_end: range_end = hit # increase current range else: # start new range @@ -3549,7 +3552,7 @@ def dump_range(): start = int(request.GET.get("start")) except (ValueError, TypeError): start = 0 - collapse = request.GET.get("collapse", None) is not None + collapse_results = request.GET.get("collapse", None) is not None ctx = conn.createServiceOptsDict() ctx.setOmeroGroup("-1") resources = conn.getSharedResources() @@ -3567,8 +3570,8 @@ def dump_range(): hits = table.getWhereList(query, None, start, end, 1) # TODO: getWhereList may ignore start and end - remove once fixed hits = (hit for hit in hits if start <= hit < end) - # Collapse if requested, and wrap in fetcher so we can get count - counter = ValueFetcher(collapse_ranges(hits) if collapse else hits) + # Collapse and wrap in fetcher so we can get count + counter = ValueFetcher(collapse_ranges(hits, collapse_results)) return { "rows": list(counter), "meta": { From 8314ae8a03f43b2604c58d670591287d9dab045a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 2 Jul 2024 08:48:07 +0000 Subject: [PATCH 16/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- omeroweb/webgateway/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index 005b8922fb..e702c50599 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3529,7 +3529,7 @@ def dump_range(): elif range_start + 1 == range_end: # two values yield from (range_start, range_end) else: # three or more values, collapse - yield f'{range_start}-{range_end}' + yield f"{range_start}-{range_end}" count = 0 for hit in generator: From 9fc9273ef8ea9a59cf17c9259626dad9ddecd09c Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Tue, 2 Jul 2024 10:51:18 +0200 Subject: [PATCH 17/22] Add missing arg docstring --- omeroweb/webgateway/views.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index e702c50599..994e525bea 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3487,6 +3487,9 @@ def perform_get_where_list(request, fileid, conn=None, **kwargs): Query arguments: query: table query in PyTables syntax start: row number to start searching + collapse: optional argument, if present, collapses three or more + sequential row numbers in the resulting array into strings formatted as + "start-end". The same format can be submitted back to the slice request. Uses MAX_TABLE_SLICE_SIZE to determine how many rows will be searched. From e71cf9946888ec711fb09ccfeb05733f7172027e Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Wed, 3 Jul 2024 14:16:38 +0200 Subject: [PATCH 18/22] Remove optional collapse --- omeroweb/webgateway/views.py | 46 +++--------------------------------- 1 file changed, 3 insertions(+), 43 deletions(-) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index 994e525bea..df49c25204 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3487,9 +3487,6 @@ def perform_get_where_list(request, fileid, conn=None, **kwargs): Query arguments: query: table query in PyTables syntax start: row number to start searching - collapse: optional argument, if present, collapses three or more - sequential row numbers in the resulting array into strings formatted as - "start-end". The same format can be submitted back to the slice request. Uses MAX_TABLE_SLICE_SIZE to determine how many rows will be searched. @@ -3514,40 +3511,6 @@ def perform_get_where_list(request, fileid, conn=None, **kwargs): table! """ - class ValueFetcher(object): - def __init__(self, generator): - self.generator = generator - self.value = None - - def __iter__(self): - self.value = yield from self.generator - - def collapse_ranges(generator, collapse=True): - range_start = range_end = None - - def dump_range(): - if range_start is not None: - if range_start == range_end: # single value - yield range_start - elif range_start + 1 == range_end: # two values - yield from (range_start, range_end) - else: # three or more values, collapse - yield f"{range_start}-{range_end}" - - count = 0 - for hit in generator: - count += 1 - if not collapse: - yield hit - continue - if hit - 1 == range_end: - range_end = hit # increase current range - else: # start new range - yield from dump_range() - range_start = range_end = hit - yield from dump_range() - return count - query = request.GET.get("query") if not query: return {"error": "Must specify query"} @@ -3555,7 +3518,6 @@ def dump_range(): start = int(request.GET.get("start")) except (ValueError, TypeError): start = 0 - collapse_results = request.GET.get("collapse", None) is not None ctx = conn.createServiceOptsDict() ctx.setOmeroGroup("-1") resources = conn.getSharedResources() @@ -3572,13 +3534,11 @@ def dump_range(): else: hits = table.getWhereList(query, None, start, end, 1) # TODO: getWhereList may ignore start and end - remove once fixed - hits = (hit for hit in hits if start <= hit < end) - # Collapse and wrap in fetcher so we can get count - counter = ValueFetcher(collapse_ranges(hits, collapse_results)) + hits = [hit for hit in hits if start <= hit < end] return { - "rows": list(counter), + "rows": hits, "meta": { - "partialCount": counter.value, + "partialCount": len(hits), "rowCount": row_count, "columnCount": column_count, "start": start, From 091e5fc2d35212271ac71fd66b42c611299d6fde Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Wed, 3 Jul 2024 14:22:04 +0200 Subject: [PATCH 19/22] Remove boundary check --- omeroweb/webgateway/views.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index df49c25204..cdaf6b28dd 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3529,12 +3529,7 @@ def perform_get_where_list(request, fileid, conn=None, **kwargs): column_count = len(table.getHeaders()) end = min(row_count, start + settings.MAX_TABLE_SLICE_SIZE) logger.info(f"Query '{query}' from rows {start} to {end}") - if start >= end: - hits = [] - else: - hits = table.getWhereList(query, None, start, end, 1) - # TODO: getWhereList may ignore start and end - remove once fixed - hits = [hit for hit in hits if start <= hit < end] + hits = table.getWhereList(query, None, start, end, 1) if start < end else [] return { "rows": hits, "meta": { From 9674aba7ec684c76c3d3fdd8ed158daa4e5870f8 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Wed, 24 Jul 2024 09:45:16 +0200 Subject: [PATCH 20/22] Check row range; check both upper and lower limits --- omeroweb/webgateway/views.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index cdaf6b28dd..0831111202 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3624,15 +3624,18 @@ def limit_generator(generator, max_items): if not table: return {"error": "Table %s not found" % fileid} column_count = len(table.getHeaders()) - if any(column >= column_count for column in columns): + row_count = table.getNumberOfRows() + if not all(0 <= column < column_count for column in columns): return {"error": "Columns out of range"} + if not all(0 <= row < row_count for row in rows): + return {"error": "Rows out of range"} try: columns = table.slice(columns, rows).columns return { "columns": [column.values for column in columns], "meta": { "columns": [column.name for column in columns], - "rowCount": table.getNumberOfRows(), + "rowCount": row_count, "columnCount": column_count, "maxCells": settings.MAX_TABLE_SLICE_SIZE, }, From 6f6d818432ac0fbc4cd9fc6388081ebc17544ddf Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Wed, 24 Jul 2024 10:38:04 +0200 Subject: [PATCH 21/22] Better setting description --- omeroweb/settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/omeroweb/settings.py b/omeroweb/settings.py index b5feefd446..5c20ec04ef 100755 --- a/omeroweb/settings.py +++ b/omeroweb/settings.py @@ -720,8 +720,8 @@ def check_session_engine(s): "MAX_TABLE_SLICE_SIZE", 1_000_000, int, - "Prevent download of OMERO.tables exceeding this number of cells " - "in a single request.", + "Maximum number of cells that can be retrieved in a single call " + "to the table slicing endpoint.", ], # VIEWER "omero.web.viewer.view": [ From c5815868107e99b7c63281ade90611c45db5bd72 Mon Sep 17 00:00:00 2001 From: Andreas Knab Date: Wed, 24 Jul 2024 10:40:03 +0200 Subject: [PATCH 22/22] Better method and URL names --- omeroweb/webgateway/urls.py | 16 ++++++++-------- omeroweb/webgateway/views.py | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/omeroweb/webgateway/urls.py b/omeroweb/webgateway/urls.py index 7b519d97eb..46cf584a28 100644 --- a/omeroweb/webgateway/urls.py +++ b/omeroweb/webgateway/urls.py @@ -604,10 +604,10 @@ """ -perform_get_where_list = re_path( +table_get_where_list = re_path( r"^table/(?P\d+)/rows/$", - views.perform_get_where_list, - name="webgateway_perform_get_where_list", + views.table_get_where_list, + name="webgateway_table_get_where_list", kwargs=COMPACT_JSON, ) """ @@ -615,10 +615,10 @@ """ -perform_slice = re_path( +table_slice = re_path( r"^table/(?P\d+)/slice/$", - views.perform_slice, - name="webgateway_perform_slice", + views.table_slice, + name="webgateway_table_slice", kwargs=COMPACT_JSON, ) """ @@ -684,6 +684,6 @@ object_table_query, open_with_options, # low-level table API - perform_get_where_list, - perform_slice, + table_get_where_list, + table_slice, ] diff --git a/omeroweb/webgateway/views.py b/omeroweb/webgateway/views.py index 0831111202..bf6e2db295 100644 --- a/omeroweb/webgateway/views.py +++ b/omeroweb/webgateway/views.py @@ -3478,7 +3478,7 @@ def get_image_rdefs_json(request, img_id=None, conn=None, **kwargs): @login_required() @jsonp -def perform_get_where_list(request, fileid, conn=None, **kwargs): +def table_get_where_list(request, fileid, conn=None, **kwargs): """ Retrieves matching row numbers for a table query @@ -3549,7 +3549,7 @@ def perform_get_where_list(request, fileid, conn=None, **kwargs): @login_required() @jsonp -def perform_slice(request, fileid, conn=None, **kwargs): +def table_slice(request, fileid, conn=None, **kwargs): """ Performs a table slice