ready for 1.5.1 release rolled back esgf dataset-id query

coecms · Aug 15, 2023 · 6aec1e2 · 6aec1e2
1 parent cc7b632
commit 6aec1e2
Show file tree

Hide file tree

Showing 7 changed files with 16 additions and 21 deletions.
diff --git a/clef/cli.py b/clef/cli.py
@@ -379,6 +379,7 @@ def common_esgf_cli(ctx, project, query, latest, replica, distrib,
                 limit=10000,
                 **constraints,
                 )
+            val = [x for x in s.query(q)][0]
 
             ids=sorted(set(x.dataset_id for x in s.query(q)))
 # when stats or csvf are True first extract attributes from dataset_ids

diff --git a/clef/esgf.py b/clef/esgf.py
@@ -37,7 +37,7 @@
 from sqlalchemy import String, Float, Integer, or_, func
 
 #from .pgvalues import values
-from .model import Path, Checksum
+from .model import Path, Checksum, C6Dataset, C5Dataset, CordexDataset
 from .exception import ClefException
 
 
@@ -167,11 +167,8 @@ def find_checksum_id(query, **kwargs):
         raise ESGFException('No matches found on ESGF, check at %s'%link_to_esgf(query, **constraints))
 
     if response['response']['numFound'] > int(response['responseHeader']['params']['rows']):
-        print(f"Too many files ({response['response']['numFound']}), try limiting your search.\n")
-        print("Returning only dataset results, hence a full comparison with local collection is not possible")
-        response = esgf_query(query, 'id,dataset_id,title,version', otype='Dataset', **constraints)
-        #raise ESGFException('Too many results (%d), try limiting your search %s'%(response['response']['numFound'], 
-        #                    link_to_esgf(query, **constraints)))
+        raise ESGFException('Too many results (%d), try limiting your search %s'%(response['response']['numFound'], 
+                            link_to_esgf(query, **constraints)))
     # separate records that do not have checksum in response (nosums list) from others (records list)
     # we should call local_search for these i.e. a search not based on checksums but is not yet implemented
     nosums=[]
@@ -189,8 +186,7 @@ def find_checksum_id(query, **kwargs):
                 records.append(doc)
             else:
                 nosums.append(doc)
-                print(doc)
-
+
     record_list = [ 
              (doc['checksum'][0],
               doc['id'].split('|')[0], # drop the server name
@@ -202,7 +198,7 @@ def find_checksum_id(query, **kwargs):
     nosums_list = [ 
              ('NA',
               doc['id'].split('|')[0], # drop the server name
-              doc['dataset_id'].split('|')[0], # Drop the server name
+              doc['id'].split('|')[0], # Drop the server name
               doc['title'],
               doc['version'],
               doc['score'])
@@ -221,7 +217,7 @@ def find_checksum_id(query, **kwargs):
         table = sqlalvalues(
             column('checksum', String),
             column('id', String),
-            #column('dataset_id', String),
+            column('dataset_id', String),
             column('title', String),
             column('version', Integer),
             column('score', Float),
@@ -259,13 +255,11 @@ def match_query(session, query, latest=None, **kwargs):
                 .outerjoin(Path))
     else:
         # Match on file name
-        #return values.outerjoin(Path, Path.path.like('%/'+values.c.title))
-        #return values.outerjoin(Path, func.regexp_replace(Path.path, '^.*/', '') == values.c.title)
         matches = checksum_table.join(Path, func.regexp_replace(Path.path, '^.*/', '') == checksum_table.c.title)
 
     if nocksum is True:
-        if project == 'CMIP6':
-            matches = (checksum_table.join(C6Dataset, C6.Dataset.dataset_id == checksum_table.c.dataset_id)) 
+        raise ESGFException(f'Some datasets have incomplete records try --local option') 
+
     return matches
 
 

diff --git a/clef/model.py b/clef/model.py
@@ -83,7 +83,7 @@ def expr(self, model):
 
 
 class Path(Base):
-    """Path of a file on Raijin, with links to metadata
+    """Path of a file on Gadi, with links to metadata
     """
     __tablename__ = 'esgf_paths'
 

diff --git a/conda/meta.yaml b/conda/meta.yaml
@@ -1,4 +1,4 @@
-{% set version = "1.5.0" %}
+{% set version = "1.5.1" %}
 package:
     name: clef
     version: {{ version }}

diff --git a/docs/conf.py b/docs/conf.py
@@ -67,8 +67,8 @@
 #version = _dist.parsed_version.base_version
 # The full version, including alpha/beta/rc tags.
 #release = _dist.version
-version = u'1.5.0'
-release = u'1.5.0'
+version = u'1.5.1'
+release = u'1.5.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = clef
-version = 1.5.0
+version = 1.5.1
 author = Scott Wales, Paola Petrelli
 author_email = [email protected], [email protected]
 summary = 'CleF queries ESGF data at NCI' 

diff --git a/test/test_esgf.py b/test/test_esgf.py
@@ -131,14 +131,14 @@ def test_checksum_id_empty(session):
     """
     with mock.patch('clef.esgf.esgf_query', side_effect=empty_query):
         with pytest.raises(ClefException):
-            table = find_checksum_id('')
+            table, nocksum = find_checksum_id('')
 
 def test_checksum_id_missing(session):
     """
     Create a values table with the returned result
     """
     with mock.patch('clef.esgf.esgf_query', side_effect=missing_query):
-        table = find_checksum_id('')
+        table, nocksum = find_checksum_id('')
         match = session.query(table).one()
         assert match.id == 'abcde'
         assert match.score == 1.0