From d94119819e2c111bedd1ccc7bc70b320cd10d071 Mon Sep 17 00:00:00 2001 From: Simran Mattu Date: Mon, 16 Sep 2024 13:29:21 +0000 Subject: [PATCH 1/4] Updated the Error IDs for the operating report Squash commit message: Updated Error codes for operating report and the unittests to accomodate for the new Error codes --- .github/workflows/main.yml | 2 +- data/migrate/Updated_Errors.csv | 126 +++++++++ data/migrate/errors-backfilling.csv | 246 +++++++++--------- woudc_data_registry/controller.py | 2 +- woudc_data_registry/dataset_validators.py | 56 ++-- woudc_data_registry/models.py | 8 +- woudc_data_registry/processing.py | 104 ++++---- woudc_data_registry/registry.py | 17 +- woudc_data_registry/tests/config/errors.csv | 126 +++++++++ .../tests/test_report_generation.py | 42 +-- 10 files changed, 496 insertions(+), 233 deletions(-) create mode 100644 data/migrate/Updated_Errors.csv create mode 100644 woudc_data_registry/tests/config/errors.csv diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7ebafcdb..eb9e7154 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,7 +20,7 @@ jobs: WDR_SEARCH_URL: http://localhost:9200/ WDR_WAF_BASEURL: https://woudc.org/archive/ WDR_WAF_BASEDIR: /tmp - WDR_ERROR_CONFIG: data/migrate/errors-backfilling.csv + WDR_ERROR_CONFIG: woudc_data_registry/tests/config/Updated_Errors.csv WDR_ALIAS_CONFIG: data/aliases.yml WDR_EXTRA_CONFIG: data/extra-options.yml GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}" diff --git a/data/migrate/Updated_Errors.csv b/data/migrate/Updated_Errors.csv new file mode 100644 index 00000000..97758a43 --- /dev/null +++ b/data/migrate/Updated_Errors.csv @@ -0,0 +1,126 @@ +Error Code,Error Type,Message Template,Notes +101,Warning,Not a text file +102,Error,No core metadata tables found. Not an Extended CSV file +103,Warning,Unexpected empty line between table header and fields +104,Warning,Improper delimiter used '{separator}' corrected to '\,' (comma) +105,Warning,#{table} field {oldfield} capitalization should be {newfield} +106,Warning,#{table} corrected to {newtable} using aliases +107,Warning,#{table} field {oldfield} corrected to {newfield} using aliases +108,Warning,#{table}.{field} value corrected to {newvalue} using aliases +109,Warning,#{table}.Time separator '{separator}' corrected to ':' (colon) +110,Warning,#{table}.Time corrected from 12-hour clock to 24-hour YYYY-MM-DD format +111,Warning,#{table}.Date separator '{separator}' corrected to '-' (hyphen) +112,Error,#{table}.Date not in YYYY-MM-DD format: missing separators +113,Error,#{table}.Date is incomplete +114,Error,#{table}.Date not in YYYY-MM-DD format: too many separators +115,Warning,#{table}.UTCOffset separator '{separator}' corrected to ':' (colon) +116,Warning,#{table}.UTCOffset {component} is less than 2 digits long +117,Warning,#{table}.UTCOffset {component} is missing\, default value is '00' (zero) +118,Warning,Missing sign in #{table}.UTCOffset\, default '+' (plus) +119,Warning,Invalid sign in #{table}.UTCOffset\, replacing with '{sign}' +120,Warning,#{table}.UTCOffset is a series of zeroes\, should be '+00:00:00' +121,Error,Cannot derive #MONTHLY table: missing #DAILY.ColumnO3 +122,Error,Lidar table counts are uneven between #OZONE_PROFILE and #OZONE_SUMMARY +123,Error,Spectral table counts are uneven between #TIMESTAMP\, #GLOBAL\, and #{summary_table} +201,Error,Missing required table #{table} +202,Error,Excess table #{table} does not belong in {dataset} file +203,Error,Missing required field #{table}.{field} +204,Error,Required field #{table}.{field} is null or empty +205,Warning,Optional field #{table}.{field} is null or empty +206,Error,Table #{table} has no fields +207,Error,Table #{table} has no fields,Placeholder for optional-table-specific error +208,Error,Required table #{table} contains no data +209,Warning,Optional table #{table} contains no data +210,Error,No non-core data tables found +211,Error,Unrecognized data {row} +212,Warning,#{table} row has more values than #{table} has columns +213,Error,Fewer than minimum {bound} occurrences of table #{table} found +214,Error,More than maximum {bound} occurrences of table #{table} found +215,Error,Fewer than minimum {bound} number of rows in table #{table} +216,Error,More than maximum {bound} number of rows in table #{table} +217,Warning,#CONTENT.Level should be {value} according to present tables +218,Warning,#CONTENT.Level {oldvalue} should be a decimal number ({newvalue}) +219,Warning,#CONTENT.Form {oldvalue} should be integral ({newvalue}) +220,Error,Cannot assess expected table set: #CONTENT.{field} unknown +221,Warning,Missing #DATA_GENERATION.Date\, defaults to processing date +222,Warning,#DATA_GENERATION.Version does not have decimal place +223,Error,Null value found for #INSTRUMENT.Name +224,Warning,Null value found for #INSTRUMENT.Model +225,Warning,Null value found for #INSTRUMENT.Number +226,Warning,Inconsistent Time values between #TIMESTAMP tables +227,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for non-TIMESTAMP tables +228,Warning,First #TIMESTAMP.Time cannot be more recent than other time(s) +229,Error,Submitted file #DATA_GENERATION.Date is earlier than previously submitted version +230,Error,No ozone data in #DAILY table +231,Warning,#DAILY.Date found in non-chronological order +232,Warning,#DAILY.Date has different year than #TIMESTAMP.Date +233,Warning,Duplicate observations found in #DAILY table for Date=(date) +234,Warning,Multiple observations found with #DAILY.Date {date} +235,Warning,#TIMESTAMP.Date before #DAILY does not equal first date of #DAILY +236,Warning,#TIMESTAMP.Date after #DAILY does not equal last date of #DAILY +237,Warning,More than two #TIMESTAMP tables found in file +238,Warning,#TIMESTAMP table after #DAILY is missing\, deriving based on requirements +239,Warning,Missing #MONTHLY table\, deriving based on requirements +240,Warning,Missing value for #MONTHLY.{field}\, deriving based on requirements +241,Warning,#MONTHLY.{field} differs from derived value +242,Warning,#OBSERVATIONS.Time found in non-chronological order +243,Warning,Duplicate observations found in #OBSERVATIONS table for Time={time} +244,Warning,Multiple observations found with #OBSERVATIONS.Time {time} +245,Warning,#{table}.Date found in non-chronological order,Reserved for Umkehr data tables +246,Warning,Multiple observations found with #{table}.Date {date},Reserved for Umkehr data tables +247,Warning,#TIMESTAMP.Date before #{table} does not equal first date of #{table},Reserved for Umkehr data tables +248,Warning,#TIMESTAMP.Date after #{table} does not equal last date of #{table},Reserved for Umkehr data tables +249,Warning,#TIMESTAMP table after #{table} is missing\, deriving based on requirements,Reserved for Umkehr data tables +250,Warning,Excess field {field} does not belong in table {table} +251,Warning,Duplicate observations found in #{table} for Date={date},Reserved for Umkehr data tables +301,Error,Failed to parse #{table}.Time {component}: contains invalid characters +302,Error,Failed to parse #{table}.Date {component}: contains invalid characters +303,Error,#{table}.Date {component} is not within allowable range [{lower}]-[{upper}],Reserved for year and month components +304,Error,#{table}.Date day is not within allowable range [{lower}]-[{upper}] +305,Error,Failed to parse #{table}.UTCOffset: contains invalid characters +306,Warning,Missing #CONTENT.Class\, default is 'WOUDC' +307,Error,#CONTENT.Class {value} failed to validate against registry +308,Error,#CONTENT.Category {value} failed to validate against registry +309,Error,Unknown #CONTENT.Level for dataset {dataset} +310,Error,Failed to parse #CONTENT.Level: contains invalid characters +311,Error,Failed to parse #CONTENT.Form: contains invalid characters +312,Error,Cannot resolve missing or invalid #CONTENT.Form +313,Error,Unknown #CONTENT.Form for dataset {dataset} and level {level} +314,Warning,Missing #DATA_GENERATION.Version\, defaults to {default} +315,Warning,#DATA_GENERATION.Version is not within allowable range [{lower}]-[{upper}] +316,Error,Failed to parse #DATA_GENERATION.Version: contains invalid characters +317,Error,#DATA_GENERATION.Agency failed to validate against registry +318,Warning,#PLATFORM.ID is not 3 digits long +319,Error,#PLATFORM.ID not found in registry +320,Error,#PLATFORM.Type in file does not match registry +321,Error,#PLATFORM.Name in file does not match registry +322,Error,#PLATFORM.Country in file does not match registry +323,Warning,Ship #PLATFORM.Country should be 'XY' to meet ISO-3166 standards +324,Warning,Failed to parse #LOCATION.Height: contains invalid characters +325,Error,#LOCATION.{field} is not within allowable range [{lower}]-[{upper}],Reserved for fields Latitude and Longitude +326,Warning,#LOCATION.Height is not within allowable range [{lower}]-[{upper}] +327,Warning,#LOCATION.{field} in file does not match registry,Reserved for fields Latitude and Longitude +328,Warning,#LOCATION.Height in file does not match registry +329,Warning,Null value found for #INSTRUMENT.Model +330,Warning,Null value found for #INSTRUMENT.Number +331,Error,#INSTRUMENT.Name not found in registry +332,Error,#INSTRUMENT.Model not found in registry +333,Error,Instrument failed to validate against registry +334,Error,Deployment {ident} not found in registry +335,Error,Failed to parse #{table}.{field} due to errors: {reason} +336,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for TIMESTAMP tables +337,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for non-TIMESTAMP tables +338,Warning,ECC instrument serial number different from previous submission +339,Error,Failed to parse #LOCATION.{field}: contains invalid characters,Reserved for fields Latitude and Longitude +340,Warning,#{table}.Time {component} is not within allowable range [{lower}]-[{upper}] +401,Error,Submitted file version and #DATA_GENERAION.Date identical to previously submitted file +402,Error,Submitted #DATA_GENERATION.Date is identical to previously submitted file +403,Error,Submitted version number is identical to previously submitted file +404,Warning,ECC instrument serial number different from previous submission +405,Warning,Data file successfully persisted +406,Warning,New instrument added +407,Warning,New deployment added +408,Warning,New station name added +409,Warning,New contribution added +410,Error,Data file failed to validate +1000,Error,Unassigned error message \ No newline at end of file diff --git a/data/migrate/errors-backfilling.csv b/data/migrate/errors-backfilling.csv index 8a4beb67..97758a43 100755 --- a/data/migrate/errors-backfilling.csv +++ b/data/migrate/errors-backfilling.csv @@ -1,122 +1,126 @@ Error Code,Error Type,Message Template,Notes -1,Warning,Not a text file -2,Error,No core metadata tables found. Not an Extended CSV file -3,Error,Missing required table #{table} -4,Error,Excess table #{table} does not belong in {dataset} file -5,Error,Missing required field #{table}.{field} -6,Warning,Excess field {field} does not belong in table {table} -7,Error,Required field #{table}.{field} is null or empty -8,Warning,Optional field #{table}.{field} is null or empty -9,Error,Table #{table} has no fields -10,Error,Table #{table} has no fields,Placeholder for optional-table-specific error -11,Error,Required table #{table} contains no data -12,Warning,Optional table #{table} contains no data -13,Error,No non-core data tables found -14,Warning,Unexpected empty line between table header and fields -15,Error,Unrecognized data {row} -16,Warning,Improper delimiter used '{separator}' corrected to '\,' (comma) -20,Warning,#{table} field {oldfield} capitalization should be {newfield} -21,Warning,#{table} corrected to {newtable} using aliases -22,Warning,#{table} field {oldfield} corrected to {newfield} using aliases -23,Warning,#{table}.{field} value corrected to {newvalue} using aliases -25,Warning,#{table} row has more values than #{table} has columns -26,Error,Fewer than minimum {bound} occurrences of table #{table} found -27,Error,More than maximum {bound} occurrences of table #{table} found -28,Error,Fewer than minimum {bound} number of rows in table #{table} -29,Error,More than maximum {bound} number of rows in table #{table} -30,Warning,#{table}.Time separator '{separator}' corrected to ':' (colon) -31,Error,Failed to parse #{table}.Time {component}: contains invalid characters -32,Warning,#{table}.Time corrected from 12-hour clock to 24-hour YYYY-MM-DD format -33,Warning,#{table}.Time {component} is not within allowable range [{lower}]-[{upper}] -34,Warning,#{table}.Date separator '{separator}' corrected to '-' (hyphen) -35,Error,#{table}.Date not in YYYY-MM-DD format: missing separators -36,Error,#{table}.Date is incomplete -37,Error,#{table}.Date not in YYYY-MM-DD format: too many separators -38,Error,Failed to parse #{table}.Date {component}: contains invalid characters -39,Error,#{table}.Date {component} is not within allowable range [{lower}]-[{upper}],Reserved for year and month components -40,Error,#{table}.Date day is not within allowable range [{lower}]-[{upper}] -41,Warning,#{table}.UTCOffset separator '{separator}' corrected to ':' (colon) -42,Warning,#{table}.UTCOffset {component} is less than 2 digits long -43,Warning,#{table}.UTCOffset {component} is missing\, default value is '00' (zero) -44,Warning,Missing sign in #{table}.UTCOffset\, default '+' (plus) -45,Warning,Invalid sign in #{table}.UTCOffset\, replacing with '{sign}' -46,Warning,#{table}.UTCOffset is a series of zeroes\, should be '+00:00:00' -47,Error,Failed to parse #{table}.UTCOffset: contains invalid characters -50,Warning,Missing #CONTENT.Class\, default is 'WOUDC' -51,Error,#CONTENT.Class {value} failed to validate against registry -52,Error,#CONTENT.Category {value} failed to validate against registry -53,Warning,#CONTENT.Level should be {value} according to present tables -54,Warning,#CONTENT.Level {oldvalue} should be a decimal number ({newvalue}) -55,Error,Failed to parse #CONTENT.Level: contains invalid characters -56,Error,Unknown #CONTENT.Level for dataset {dataset} -57,Warning,#CONTENT.Form {oldvalue} should be integral ({newvalue}) -58,Error,Failed to parse #CONTENT.Form: contains invalid characters -59,Error,Cannot resolve missing or invalid #CONTENT.Form -60,Error,Unknown #CONTENT.Form for dataset {dataset} and level {level} -61,Error,Cannot assess expected table set: #CONTENT.{field} unknown -62,Warning,Missing #DATA_GENERATION.Date\, defaults to processing date -63,Warning,Missing #DATA_GENERATION.Version\, defaults to {default} -64,Warning,#DATA_GENERATION.Version is not within allowable range [{lower}]-[{upper}] -65,Warning,#DATA_GENERATION.Version does not have exactly decimal place -66,Error,Failed to parse #DATA_GENERATION.Version: contains invalid characters -67,Error,#DATA_GENERATION.Agency failed to validate against registry -70,Warning,#PLATFORM.ID is not 3 digits long -71,Error,#PLATFORM.ID not found in registry -72,Error,#PLATFORM.Type in file does not match registry -73,Error,#PLATFORM.Name in file does not match registry -74,Error,#PLATFORM.Country in file does not match registry -75,Warning,Ship #PLATFORM.Country should be 'XY' to meet ISO-3166 standards -76,Error,Failed to parse #LOCATION.{field}: contains invalid characters,Reserved for fields Latitude and Longitude -77,Warning,Failed to parse #LOCATION.Height: contains invalid characters -78,Error,#LOCATION.{field} is not within allowable range [{lower}]-[{upper}],Reserved for fields Latitude and Longitude -79,Warning,#LOCATION.Height is not within allowable range [{lower}]-[{upper}] -80,Warning,#LOCATION.{field} in file does not match registry,Reserved for fields Latitude and Longitude -81,Warning,#LOCATION.Height in file does not match registry -82,Error,Null value found for #INSTRUMENT.Name -83,Warning,Null value found for #INSTRUMENT.Model -84,Warning,Null value found for #INSTRUMENT.Number -85,Error,#INSTRUMENT.Name not found in registry -86,Error,#INSTRUMENT.Model not found in registry -87,Error,Instrument failed to validate against registry -88,Error,Deployment {ident} not found in registry -89,Error,Failed to parse #{table}.{field} due to errors: {reason} -90,Warning,Inconsistent Time values between #TIMESTAMP tables -91,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for TIMESTAMP tables -92,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for non-TIMESTAMP tables -93,Warning,First #TIMESTAMP.Time cannot be more recent than other time(s) -95,Error,Submitted file #DATA_GENERATION.Date is earlier than previously submitted version -96,Error,Submitted file version and #DATA_GENERAION.Date identical to previously submitted file -97,Error,Submitted #DATA_GENERATION.Date is identical to previously submitted file -98,Error,Submitted version number is identical to previously submitted file -99,Warning,ECC instrument serial number different from previous submission -101,Error,No ozone data in #DAILY table -102,Warning,#DAILY.Date found in non-chronological order -103,Warning,#DAILY.Date has different year than #TIMESTAMP.Date -104,Warning,Duplicate observations found in #DAILY table for Date=(date) -105,Warning,Multiple observations found with #DAILY.Date {date} -106,Warning,#TIMESTAMP.Date before #DAILY does not equal first date of #DAILY -107,Warning,#TIMESTAMP.Date after #DAILY does not equal last date of #DAILY -108,Warning,More than two #TIMESTAMP tables found in file -109,Warning,#TIMESTAMP table after #DAILY is missing\, deriving based on requirements -110,Warning,Missing #MONTHLY table\, deriving based on requirements -111,Warning,Missing value for #MONTHLY.{field}\, deriving based on requirements -112,Warning,#MONTHLY.{field} differs from derived value -113,Error,Cannot derive #MONTHLY table: missing #DAILY.ColumnO3 -114,Warning,#OBSERVATIONS.Time found in non-chronological order -115,Warning,Duplicate observations found in #OBSERVATIONS table for Time={time} -116,Warning,Multiple observations found with #OBSERVATIONS.Time {time} -118,Warning,#{table}.Date found in non-chronological order,Reserved for Umkehr data tables -119,Warning,Duplicate observations found in #{table} for Date={date},Reserved for Umkehr data tables -120,Warning,Multiple observations found with #{table}.Date {date},Reserved for Umkehr data tables -121,Warning,#TIMESTAMP.Date before #{table} does not equal first date of #{table},Reserved for Umkehr data tables -122,Warning,#TIMESTAMP.Date after #{table} does not equal last date of #{table},Reserved for Umkehr data tables -123,Warning,#TIMESTAMP table after #{table} is missing\, deriving based on requirements,Reserved for Umkehr data tables -125,Error,Lidar table counts are uneven between #OZONE_PROFILE and #OZONE_SUMMARY -126,Error,Spectral table counts are uneven between #TIMESTAMP\, #GLOBAL\, and #{summary_table} -200,Warning,Data file successfully persisted -201,Warning,New instrument added -202,Warning,New deployment added -203,Warning,New station name added -204,Warning,New contribution added -209,Error,Data file failed to validate -1000,Error,Unassigned error message +101,Warning,Not a text file +102,Error,No core metadata tables found. Not an Extended CSV file +103,Warning,Unexpected empty line between table header and fields +104,Warning,Improper delimiter used '{separator}' corrected to '\,' (comma) +105,Warning,#{table} field {oldfield} capitalization should be {newfield} +106,Warning,#{table} corrected to {newtable} using aliases +107,Warning,#{table} field {oldfield} corrected to {newfield} using aliases +108,Warning,#{table}.{field} value corrected to {newvalue} using aliases +109,Warning,#{table}.Time separator '{separator}' corrected to ':' (colon) +110,Warning,#{table}.Time corrected from 12-hour clock to 24-hour YYYY-MM-DD format +111,Warning,#{table}.Date separator '{separator}' corrected to '-' (hyphen) +112,Error,#{table}.Date not in YYYY-MM-DD format: missing separators +113,Error,#{table}.Date is incomplete +114,Error,#{table}.Date not in YYYY-MM-DD format: too many separators +115,Warning,#{table}.UTCOffset separator '{separator}' corrected to ':' (colon) +116,Warning,#{table}.UTCOffset {component} is less than 2 digits long +117,Warning,#{table}.UTCOffset {component} is missing\, default value is '00' (zero) +118,Warning,Missing sign in #{table}.UTCOffset\, default '+' (plus) +119,Warning,Invalid sign in #{table}.UTCOffset\, replacing with '{sign}' +120,Warning,#{table}.UTCOffset is a series of zeroes\, should be '+00:00:00' +121,Error,Cannot derive #MONTHLY table: missing #DAILY.ColumnO3 +122,Error,Lidar table counts are uneven between #OZONE_PROFILE and #OZONE_SUMMARY +123,Error,Spectral table counts are uneven between #TIMESTAMP\, #GLOBAL\, and #{summary_table} +201,Error,Missing required table #{table} +202,Error,Excess table #{table} does not belong in {dataset} file +203,Error,Missing required field #{table}.{field} +204,Error,Required field #{table}.{field} is null or empty +205,Warning,Optional field #{table}.{field} is null or empty +206,Error,Table #{table} has no fields +207,Error,Table #{table} has no fields,Placeholder for optional-table-specific error +208,Error,Required table #{table} contains no data +209,Warning,Optional table #{table} contains no data +210,Error,No non-core data tables found +211,Error,Unrecognized data {row} +212,Warning,#{table} row has more values than #{table} has columns +213,Error,Fewer than minimum {bound} occurrences of table #{table} found +214,Error,More than maximum {bound} occurrences of table #{table} found +215,Error,Fewer than minimum {bound} number of rows in table #{table} +216,Error,More than maximum {bound} number of rows in table #{table} +217,Warning,#CONTENT.Level should be {value} according to present tables +218,Warning,#CONTENT.Level {oldvalue} should be a decimal number ({newvalue}) +219,Warning,#CONTENT.Form {oldvalue} should be integral ({newvalue}) +220,Error,Cannot assess expected table set: #CONTENT.{field} unknown +221,Warning,Missing #DATA_GENERATION.Date\, defaults to processing date +222,Warning,#DATA_GENERATION.Version does not have decimal place +223,Error,Null value found for #INSTRUMENT.Name +224,Warning,Null value found for #INSTRUMENT.Model +225,Warning,Null value found for #INSTRUMENT.Number +226,Warning,Inconsistent Time values between #TIMESTAMP tables +227,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for non-TIMESTAMP tables +228,Warning,First #TIMESTAMP.Time cannot be more recent than other time(s) +229,Error,Submitted file #DATA_GENERATION.Date is earlier than previously submitted version +230,Error,No ozone data in #DAILY table +231,Warning,#DAILY.Date found in non-chronological order +232,Warning,#DAILY.Date has different year than #TIMESTAMP.Date +233,Warning,Duplicate observations found in #DAILY table for Date=(date) +234,Warning,Multiple observations found with #DAILY.Date {date} +235,Warning,#TIMESTAMP.Date before #DAILY does not equal first date of #DAILY +236,Warning,#TIMESTAMP.Date after #DAILY does not equal last date of #DAILY +237,Warning,More than two #TIMESTAMP tables found in file +238,Warning,#TIMESTAMP table after #DAILY is missing\, deriving based on requirements +239,Warning,Missing #MONTHLY table\, deriving based on requirements +240,Warning,Missing value for #MONTHLY.{field}\, deriving based on requirements +241,Warning,#MONTHLY.{field} differs from derived value +242,Warning,#OBSERVATIONS.Time found in non-chronological order +243,Warning,Duplicate observations found in #OBSERVATIONS table for Time={time} +244,Warning,Multiple observations found with #OBSERVATIONS.Time {time} +245,Warning,#{table}.Date found in non-chronological order,Reserved for Umkehr data tables +246,Warning,Multiple observations found with #{table}.Date {date},Reserved for Umkehr data tables +247,Warning,#TIMESTAMP.Date before #{table} does not equal first date of #{table},Reserved for Umkehr data tables +248,Warning,#TIMESTAMP.Date after #{table} does not equal last date of #{table},Reserved for Umkehr data tables +249,Warning,#TIMESTAMP table after #{table} is missing\, deriving based on requirements,Reserved for Umkehr data tables +250,Warning,Excess field {field} does not belong in table {table} +251,Warning,Duplicate observations found in #{table} for Date={date},Reserved for Umkehr data tables +301,Error,Failed to parse #{table}.Time {component}: contains invalid characters +302,Error,Failed to parse #{table}.Date {component}: contains invalid characters +303,Error,#{table}.Date {component} is not within allowable range [{lower}]-[{upper}],Reserved for year and month components +304,Error,#{table}.Date day is not within allowable range [{lower}]-[{upper}] +305,Error,Failed to parse #{table}.UTCOffset: contains invalid characters +306,Warning,Missing #CONTENT.Class\, default is 'WOUDC' +307,Error,#CONTENT.Class {value} failed to validate against registry +308,Error,#CONTENT.Category {value} failed to validate against registry +309,Error,Unknown #CONTENT.Level for dataset {dataset} +310,Error,Failed to parse #CONTENT.Level: contains invalid characters +311,Error,Failed to parse #CONTENT.Form: contains invalid characters +312,Error,Cannot resolve missing or invalid #CONTENT.Form +313,Error,Unknown #CONTENT.Form for dataset {dataset} and level {level} +314,Warning,Missing #DATA_GENERATION.Version\, defaults to {default} +315,Warning,#DATA_GENERATION.Version is not within allowable range [{lower}]-[{upper}] +316,Error,Failed to parse #DATA_GENERATION.Version: contains invalid characters +317,Error,#DATA_GENERATION.Agency failed to validate against registry +318,Warning,#PLATFORM.ID is not 3 digits long +319,Error,#PLATFORM.ID not found in registry +320,Error,#PLATFORM.Type in file does not match registry +321,Error,#PLATFORM.Name in file does not match registry +322,Error,#PLATFORM.Country in file does not match registry +323,Warning,Ship #PLATFORM.Country should be 'XY' to meet ISO-3166 standards +324,Warning,Failed to parse #LOCATION.Height: contains invalid characters +325,Error,#LOCATION.{field} is not within allowable range [{lower}]-[{upper}],Reserved for fields Latitude and Longitude +326,Warning,#LOCATION.Height is not within allowable range [{lower}]-[{upper}] +327,Warning,#LOCATION.{field} in file does not match registry,Reserved for fields Latitude and Longitude +328,Warning,#LOCATION.Height in file does not match registry +329,Warning,Null value found for #INSTRUMENT.Model +330,Warning,Null value found for #INSTRUMENT.Number +331,Error,#INSTRUMENT.Name not found in registry +332,Error,#INSTRUMENT.Model not found in registry +333,Error,Instrument failed to validate against registry +334,Error,Deployment {ident} not found in registry +335,Error,Failed to parse #{table}.{field} due to errors: {reason} +336,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for TIMESTAMP tables +337,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for non-TIMESTAMP tables +338,Warning,ECC instrument serial number different from previous submission +339,Error,Failed to parse #LOCATION.{field}: contains invalid characters,Reserved for fields Latitude and Longitude +340,Warning,#{table}.Time {component} is not within allowable range [{lower}]-[{upper}] +401,Error,Submitted file version and #DATA_GENERAION.Date identical to previously submitted file +402,Error,Submitted #DATA_GENERATION.Date is identical to previously submitted file +403,Error,Submitted version number is identical to previously submitted file +404,Warning,ECC instrument serial number different from previous submission +405,Warning,Data file successfully persisted +406,Warning,New instrument added +407,Warning,New deployment added +408,Warning,New station name added +409,Warning,New contribution added +410,Error,Data file failed to validate +1000,Error,Unassigned error message \ No newline at end of file diff --git a/woudc_data_registry/controller.py b/woudc_data_registry/controller.py index 45065103..25e0002d 100644 --- a/woudc_data_registry/controller.py +++ b/woudc_data_registry/controller.py @@ -117,7 +117,7 @@ def orchestrate(source, working_dir, metadata_only=False, LOGGER.info('Detecting file') if not is_text_file(file_to_process): - _, is_error = op_report.add_message(1) + _, is_error = op_report.add_message(101) if is_error: op_report.write_failing_file(file_to_process, contributor) run_report.write_failing_file(file_to_process, contributor) diff --git a/woudc_data_registry/dataset_validators.py b/woudc_data_registry/dataset_validators.py index 5790869b..604535e0 100644 --- a/woudc_data_registry/dataset_validators.py +++ b/woudc_data_registry/dataset_validators.py @@ -191,7 +191,7 @@ def check_time_series(self, extcsv): daily_date = row[0] if daily_date.year != timestamp1_date.year: - if not self._add_to_report(103, line_num): + if not self._add_to_report(232, line_num): success = False if prev_date and daily_date < prev_date: @@ -201,10 +201,10 @@ def check_time_series(self, extcsv): if daily_date not in dates_encountered: dates_encountered[daily_date] = row elif row == dates_encountered[daily_date]: - if not self._add_to_report(104, line_num, date=daily_date): + if not self._add_to_report(233, line_num, date=daily_date): success = False rows_to_remove.append(index) - elif not self._add_to_report(105, line_num, date=daily_date): + elif not self._add_to_report(234, line_num, date=daily_date): success = False rows_to_remove.reverse() @@ -212,7 +212,7 @@ def check_time_series(self, extcsv): for date in dateList: if isinstance(date, (str, int)): is_string = True - if not self._add_to_report(102, daily_startline): + if not self._add_to_report(231, daily_startline): success = False break @@ -222,7 +222,7 @@ def check_time_series(self, extcsv): column.pop(index) if not in_order: - if not self._add_to_report(102, daily_startline): + if not self._add_to_report(231, daily_startline): success = False sorted_dates = sorted(extcsv.extcsv['DAILY']['Date']) @@ -257,13 +257,13 @@ def check_timestamps(self, extcsv): timestamp1_valueline = timestamp1_startline + 2 if timestamp1_date != daily_dates[0]: - if not self._add_to_report(106, timestamp1_valueline): + if not self._add_to_report(235, timestamp1_valueline): success = False extcsv.extcsv['TIMESTAMP']['Date'] = daily_dates[0] timestamp_count = extcsv.table_count('TIMESTAMP') if timestamp_count == 1: - if not self._add_to_report(109): + if not self._add_to_report(238): success = False utcoffset = extcsv.extcsv['TIMESTAMP']['UTCOffset'] @@ -284,17 +284,17 @@ def check_timestamps(self, extcsv): else timestamp2_startline + 2 if timestamp2_date != daily_dates[-1]: - if not self._add_to_report(107, timestamp2_valueline): + if not self._add_to_report(236, timestamp2_valueline): success = False extcsv.extcsv['TIMESTAMP_2']['Date'] = daily_dates[-1] if timestamp2_time != timestamp1_time: - if not self._add_to_report(90, timestamp2_valueline): + if not self._add_to_report(226, timestamp2_valueline): success = False if timestamp_count > 2: timestamp3_startline = extcsv.line_num('TIMESTAMP_3') - if not self._add_to_report(108, timestamp3_startline): + if not self._add_to_report(237, timestamp3_startline): success = False for ind in range(3, timestamp_count + 1): @@ -322,7 +322,7 @@ def check_monthly(self, extcsv): return False if 'MONTHLY' not in extcsv.extcsv: - if not self._add_to_report(110): + if not self._add_to_report(239): success = False else: present_monthly = extcsv.extcsv['MONTHLY'] @@ -331,11 +331,11 @@ def check_monthly(self, extcsv): for field, derived_val in template_monthly.items(): if field not in present_monthly: - if not self._add_to_report(111, monthly_valueline, + if not self._add_to_report(240, monthly_valueline, field=field): success = False elif present_monthly[field] != template_monthly[field]: - if not self._add_to_report(112, monthly_valueline, + if not self._add_to_report(241, monthly_valueline, field=field): success = False @@ -363,13 +363,13 @@ def derive_monthly_from_daily(self, extcsv): daily_valueline = daily_fieldline + 1 if not ozone_column: - self._add_to_report(113, daily_fieldline) + self._add_to_report(121, daily_fieldline) msg = 'Cannot derive #MONTHLY table: #DAILY.ColumnO3 missing' raise Exception(msg) ozone_column = list(filter(bool, ozone_column)) if len(ozone_column) == 0: - self._add_to_report(101, daily_valueline) + self._add_to_report(230, daily_valueline) msg = 'Cannot derive #MONTHLY table: no ozone data in #DAILY' raise Exception(msg) @@ -455,10 +455,10 @@ def check_time_series(self, extcsv): if time not in times_encountered: times_encountered[time] = row elif row == times_encountered[time]: - if not self._add_to_report(115, line_num, time=time): + if not self._add_to_report(243, line_num, time=time): success = False rows_to_remove.append(index) - elif not self._add_to_report(116, line_num, time=time): + elif not self._add_to_report(244, line_num, time=time): success = False rows_to_remove.reverse() @@ -467,7 +467,7 @@ def check_time_series(self, extcsv): column.pop(index) if not in_order: - if not self._add_to_report(114, observations_valueline): + if not self._add_to_report(242, observations_valueline): success = False sorted_times = sorted(extcsv.extcsv['OBSERVATIONS']['Time']) @@ -530,7 +530,7 @@ def check_groupings(self, extcsv): summary_count = extcsv.table_count(summary_table) if not timestamp_count == global_count == summary_count: - if not self._add_to_report(126, summary_table=summary_table): + if not self._add_to_report(123, summary_table=summary_table): success = False return success @@ -580,7 +580,7 @@ def check_groupings(self, extcsv): summary_count = extcsv.table_count('OZONE_SUMMARY') if profile_count != summary_count: - if not self._add_to_report(125): + if not self._add_to_report(122): success = False return success @@ -651,11 +651,11 @@ def check_time_series(self, extcsv): if observation_date not in dates_encountered: dates_encountered[observation_date] = row elif row == dates_encountered[observation_date]: - if not self._add_to_report(119, line_num, table=data_table, + if not self._add_to_report(251, line_num, table=data_table, date=observation_date): success = False rows_to_remove.append(index) - elif not self._add_to_report(120, line_num, table=data_table, + elif not self._add_to_report(246, line_num, table=data_table, date=observation_date): success = False @@ -665,7 +665,7 @@ def check_time_series(self, extcsv): column.pop(index) if not in_order: - if not self._add_to_report(118, data_table_valueline, + if not self._add_to_report(245, data_table_valueline, table=data_table): success = False @@ -701,14 +701,14 @@ def check_timestamps(self, extcsv): timestamp1_valueline = timestamp1_startline + 2 if timestamp1_date != observation_dates[0]: - if not self._add_to_report(121, timestamp1_valueline, + if not self._add_to_report(247, timestamp1_valueline, table=data_table): success = False extcsv.extcsv['TIMESTAMP']['Date'] = observation_dates[0] timestamp_count = extcsv.table_count('TIMESTAMP') if timestamp_count == 1: - if not self._add_to_report(123, table=data_table): + if not self._add_to_report(249, table=data_table): success = False utcoffset = extcsv.extcsv['TIMESTAMP']['UTCOffset'] @@ -729,19 +729,19 @@ def check_timestamps(self, extcsv): else timestamp2_startline + 2 if timestamp2_date != observation_dates[-1]: - if not self._add_to_report(122, timestamp2_valueline, + if not self._add_to_report(248, timestamp2_valueline, table=data_table): success = False extcsv.extcsv['TIMESTAMP_2']['Date'] = observation_dates[-1] if timestamp2_time != timestamp1_time: - if not self._add_to_report(90, timestamp2_valueline): + if not self._add_to_report(226, timestamp2_valueline): success = False if timestamp_count > 2: timestamp3_startline = extcsv.line_num('TIMESTAMP_3') - if not self._add_to_report(108, timestamp3_startline): + if not self._add_to_report(237, timestamp3_startline): success = False for ind in range(3, timestamp_count + 1): diff --git a/woudc_data_registry/models.py b/woudc_data_registry/models.py index a2511d2e..4318d711 100644 --- a/woudc_data_registry/models.py +++ b/woudc_data_registry/models.py @@ -1421,14 +1421,16 @@ def timestamp_utc(self): def get_waf_path(self, dict_): """generate WAF url""" - datasetdirname = f"{self.dataset_id}_{dict_['dataset_level']}_{dict_['dataset_form']}" # noqa + datasetdirname = '{}_{}_{}'.format(self.dataset_id, + dict_['dataset_level'], + dict_['dataset_form']) timestamp_date = datetime.datetime.strptime( dict_['timestamp_date'], '%Y-%m-%d').date() url_tokens = [ config.WDR_WAF_BASEURL.rstrip('/'), 'Archive-NewFormat', datasetdirname, - f"{dict_['station_type'].lower()}{self.station_id}", # noqa + '{}{}'.format(dict_['station_type'].lower(), self.station_id), dict_['instrument_name'].lower(), timestamp_date.strftime('%Y'), dict_['filename'] @@ -1440,7 +1442,7 @@ def get_waf_path(self, dict_): def __geo_interface__(self): gaw_baseurl = 'https://gawsis.meteoswiss.ch/GAWSIS/index.html#' \ '/search/station/stationReportDetails' - gaw_pagename = f'0-20008-0-{self.station.gaw_id}' + gaw_pagename = '0-20008-0-{}'.format(self.station.gaw_id) return { 'id': self.uv_id, diff --git a/woudc_data_registry/processing.py b/woudc_data_registry/processing.py index d0bbc259..7f15c36c 100644 --- a/woudc_data_registry/processing.py +++ b/woudc_data_registry/processing.py @@ -178,13 +178,13 @@ def validate(self, extcsv, metadata_only=False, verify_only=False, elif self.add_deployment(bypass=bypass): deployment_ok = True - self._add_to_report(202) + self._add_to_report(407) else: msg = f'Deployment {deployment_name} not added. Skipping file.' # noqa LOGGER.warning(msg) line = self.extcsv.line_num('PLATFORM') + 2 - deployment_ok = self._add_to_report(88, line, + deployment_ok = self._add_to_report(334, line, ident=deployment_id) LOGGER.debug('Validating instrument') @@ -230,11 +230,11 @@ def validate(self, extcsv, metadata_only=False, verify_only=False, instrument_ok = self.add_instrument(bypass=bypass) if instrument_ok: - self._add_to_report(201) + self._add_to_report(406) if not instrument_ok: line = self.extcsv.line_num('INSTRUMENT') + 2 - instrument_ok = self._add_to_report(87, line) + instrument_ok = self._add_to_report(333, line) location_ok = False else: @@ -257,7 +257,7 @@ def validate(self, extcsv, metadata_only=False, verify_only=False, contribution_ok = self.add_contribution(bypass=bypass) if contribution_ok and (not contribution_exists): - self._add_to_report(204) + self._add_to_report(409) content_ok = self.check_content() data_generation_ok = self.check_data_generation() @@ -266,7 +266,7 @@ def validate(self, extcsv, metadata_only=False, verify_only=False, platform_ok, deployment_ok, instrument_ok, location_ok, content_ok, data_generation_ok, contribution_ok]): - self._add_to_report(209) + self._add_to_report(410) return None if metadata_only: @@ -280,7 +280,7 @@ def validate(self, extcsv, metadata_only=False, verify_only=False, dataset_validated = dataset_validator.check_all(self.extcsv) if not all([time_series_ok, dataset_validated]): - self._add_to_report(209) + self._add_to_report(410) return None LOGGER.info('Validating data record') @@ -288,14 +288,14 @@ def validate(self, extcsv, metadata_only=False, verify_only=False, data_record_ok = self.check_data_record(data_record) if not data_record_ok: - self._add_to_report(209) + self._add_to_report(410) return None else: LOGGER.info('Data record is valid and verified') self._registry_updates.append(data_record) self._search_index_updates.append(data_record) - self._add_to_report(200) + self._add_to_report(405) return data_record def persist(self): @@ -575,7 +575,7 @@ def check_project(self): return True else: line = self.extcsv.line_num('CONTENT') + 2 - return self._add_to_report(51, line, value=project) + return self._add_to_report(307, line, value=project) def check_dataset(self): """ @@ -606,7 +606,7 @@ def check_dataset(self): return True else: line = self.extcsv.line_num('CONTENT') + 2 - return self._add_to_report(52, line, value=dataset) + return self._add_to_report(308, line, value=dataset) def check_contributor(self): """ @@ -631,7 +631,7 @@ def check_contributor(self): replacement = ALIASES['Agency'][agency] if not isinstance(replacement, str): - if not self._add_to_report(23, line, replacement): + if not self._add_to_report(108, line, replacement): success = False agency = replacement @@ -653,7 +653,7 @@ def check_contributor(self): LOGGER.debug(f'Match found for contributor ID {result.contributor_id}') # noqa else: line = self.extcsv.line_num('DATA_GENERATION') + 2 - if not self._add_to_report(67, line): + if not self._add_to_report(317, line): success = False return success @@ -687,13 +687,13 @@ def check_station(self, bypass=False, verify=False): water_codes = ['*IW', 'IW', 'XZ'] if pl_type == 'SHP' and any([not country, country in water_codes]): - if not self._add_to_report(75, valueline): + if not self._add_to_report(323, valueline): success = False self.extcsv.extcsv['PLATFORM']['Country'] = country = 'XY' if len(identifier) < 3: - if not self._add_to_report(70, valueline): + if not self._add_to_report(318, valueline): success = False identifier = identifier.rjust(3, '0') @@ -712,7 +712,7 @@ def check_station(self, bypass=False, verify=False): if response: LOGGER.debug(f'Validated station with id: {identifier}') else: - self._add_to_report(71, valueline) + self._add_to_report(319, valueline) return False LOGGER.debug('Validating station type...') @@ -721,7 +721,7 @@ def check_station(self, bypass=False, verify=False): if type_ok: LOGGER.debug(f'Validated station type {type_ok}') - elif not self._add_to_report(72, valueline): + elif not self._add_to_report(320, valueline): success = False LOGGER.debug('Validating station name...') @@ -736,7 +736,7 @@ def check_station(self, bypass=False, verify=False): LOGGER.info('Verify mode. Skipping station name addition.') elif self.add_station_name(bypass=bypass): LOGGER.info(f"Added new station name {station['current_name']}") - elif not self._add_to_report(73, valueline, name=name): + elif not self._add_to_report(321, valueline, name=name): success = False LOGGER.debug('Validating station country...') @@ -748,7 +748,7 @@ def check_station(self, bypass=False, verify=False): country = response.country self.extcsv.extcsv['PLATFORM']['Country'] = country.country_id LOGGER.debug(f'Validated with country: {country.name_en} ({country.country_id}) for id: {identifier}') # noqa - elif not self._add_to_report(74, valueline): + elif not self._add_to_report(322, valueline): success = False return success @@ -813,11 +813,11 @@ def check_instrument_name_and_model(self): valueline = self.extcsv.line_num('INSTRUMENT') + 2 if not name or name.lower() in ['na', 'n/a']: - if not self._add_to_report(82, valueline): + if not self._add_to_report(223, valueline): success = False self.extcsv.extcsv['INSTRUMENT']['Name'] = name = 'UNKNOWN' if not model or str(model).lower() in ['na', 'n/a']: - if not self._add_to_report(83, valueline): + if not self._add_to_report(224, valueline): success = False self.extcsv.extcsv['INSTRUMENT']['Model'] = model = 'UNKNOWN' @@ -834,7 +834,7 @@ def check_instrument_name_and_model(self): if instrument: name = instrument.name self.extcsv.extcsv['INSTRUMENT']['Name'] = instrument.name - elif not self._add_to_report(85, valueline, name=name): + elif not self._add_to_report(331, valueline, name=name): success = False # Check data registry for matching instrument model @@ -843,7 +843,7 @@ def check_instrument_name_and_model(self): if instrument: model = instrument.model self.extcsv.extcsv['INSTRUMENT']['Model'] = instrument.model - elif not self._add_to_report(86, valueline): + elif not self._add_to_report(332, valueline): success = False return success @@ -911,11 +911,11 @@ def check_location(self): lat_numeric = float(lat) if -90 <= lat_numeric <= 90: LOGGER.debug('Validated instrument latitude') - elif not self._add_to_report(78, valueline, field='Latitude', + elif not self._add_to_report(325, valueline, field='Latitude', lower=-90, upper=90): success = False except ValueError: - if not self._add_to_report(76, valueline, field='Longitude'): + if not self._add_to_report(339, valueline, field='Longitude'): success = False self.extcsv.extcsv['LOCATION']['Latitude'] = lat = None @@ -925,11 +925,11 @@ def check_location(self): lon_numeric = float(lon) if -180 <= lon_numeric <= 180: LOGGER.debug('Validated instrument longitude') - elif not self._add_to_report(78, valueline, field='Longitude', + elif not self._add_to_report(325, valueline, field='Longitude', lower=-180, upper=180): success = False except ValueError: - if not self._add_to_report(76, valueline, field='Longitude'): + if not self._add_to_report(339, valueline, field='Longitude'): success = False self.extcsv.extcsv['LOCATION']['Longitude'] = lon = None @@ -939,10 +939,10 @@ def check_location(self): height_numeric = float(height) if height else None if not height or -50 <= height_numeric <= 5100: LOGGER.debug('Validated instrument height') - elif not self._add_to_report(79, valueline, lower=-50, upper=5100): + elif not self._add_to_report(326, valueline, lower=-50, upper=5100): success = False except ValueError: - if not self._add_to_report(77, valueline): + if not self._add_to_report(324, valueline): success = False self.extcsv.extcsv['LOCATION']['Height'] = height = None @@ -975,18 +975,18 @@ def check_location(self): if lat_numeric is not None and instrument.y is not None \ and abs(lat_numeric - instrument.y) >= lat_interval: - if not self._add_to_report(80, valueline, field='Latitude'): + if not self._add_to_report(327, valueline, field='Latitude'): success = False if lon_numeric is not None and instrument.x is not None: if in_polar_region and ignore_polar_lon: LOGGER.info('Skipping longitude check in polar region') elif abs(lon_numeric - instrument.x) >= lon_interval: - if not self._add_to_report(80, valueline, + if not self._add_to_report(327, valueline, field='Longitude'): success = False if height_numeric is not None and instrument.z is not None \ and abs(height_numeric - instrument.z) >= height_interval: - if not self._add_to_report(81, valueline): + if not self._add_to_report(328, valueline): success = False return success @@ -1015,21 +1015,21 @@ def check_content(self): if not level: if dataset == 'UmkehrN14' and 'C_PROFILE' in self.extcsv.extcsv: - if not self._add_to_report(53, valueline, value=2.0): + if not self._add_to_report(217, valueline, value=2.0): success = False self.extcsv.extcsv['CONTENT']['Level'] = level = 2.0 else: - if not self._add_to_report(53, valueline, value=1.0): + if not self._add_to_report(217, valueline, value=1.0): success = False self.extcsv.extcsv['CONTENT']['Level'] = level = 1.0 elif not isinstance(level, float): try: - if not self._add_to_report(54, valueline, oldvalue=level, + if not self._add_to_report(218, valueline, oldvalue=level, newvalue=float(level)): success = False self.extcsv.extcsv['CONTENT']['Level'] = level = float(level) except ValueError: - if not self._add_to_report(55, valueline): + if not self._add_to_report(310, valueline): success = False if dataset in ['UmkehrN14_1.0', 'UmkehrN14_2.0']: @@ -1038,17 +1038,17 @@ def check_content(self): table_index = dataset if str(level) not in DOMAINS['Datasets'][table_index]: - if not self._add_to_report(56, valueline, dataset=dataset): + if not self._add_to_report(309, valueline, dataset=dataset): success = False if not isinstance(form, int): try: - if not self._add_to_report(57, valueline, oldvalue=form, + if not self._add_to_report(219, valueline, oldvalue=form, newvalue=int(form)): success = False self.extcsv.extcsv['CONTENT']['Form'] = form = int(form) except ValueError: - if not self._add_to_report(58, valueline): + if not self._add_to_report(311, valueline): success = False return success @@ -1073,7 +1073,7 @@ def check_data_generation(self): valueline = self.extcsv.line_num('DATA_GENERATION') if not dg_date: - if not self._add_to_report(62, valueline): + if not self._add_to_report(221, valueline): success = False kwargs = {key: getattr(self.process_start, key) @@ -1086,7 +1086,7 @@ def check_data_generation(self): try: numeric_version = float(version) except TypeError: - if not self._add_to_report(63, valueline, default=1.0): + if not self._add_to_report(314, valueline, default=1.0): success = False self.extcsv.extcsv['DATA_GENERATION']['Version'] = version = '1.0' @@ -1097,17 +1097,17 @@ def check_data_generation(self): version = version[:-2] numeric_version = float(version) except ValueError: - if not self._add_to_report(66, valueline): + if not self._add_to_report(316, valueline): success = False if not success: return False if not 0 <= numeric_version <= 20: - if not self._add_to_report(64, valueline, lower=0.0, upper=20.0): + if not self._add_to_report(315, valueline, lower=0.0, upper=20.0): success = False if str(version) == str(int(numeric_version)): - if not self._add_to_report(65, valueline): + if not self._add_to_report(222, valueline): success = False self.extcsv.extcsv['DATA_GENERATION']['Version'] = \ @@ -1142,12 +1142,12 @@ def check_time_series(self): for line, other_date in enumerate(date_column, valueline): if (isinstance(other_date, (str, int, type(None))) or isinstance(dg_date, (str, int, type(None)))): - err_code = 91 if table.startswith('TIMESTAMP') else 92 + err_code = 336 if table.startswith('TIMESTAMP') else 337 if not self._add_to_report(err_code, line, table=table): success = False else: if other_date > dg_date: - err_code = 91 if table.startswith('TIMESTAMP') else 92 + err_code = 336 if table.startswith('TIMESTAMP') else 337 if not self._add_to_report(err_code, line, table=table): success = False @@ -1162,7 +1162,7 @@ def check_time_series(self): or isinstance(ts_time, (str, int, type(None)))): pass elif other_time and other_time < ts_time: - if not self._add_to_report(93, line): + if not self._add_to_report(228, line): success = False return success @@ -1215,16 +1215,16 @@ def check_data_record(self, data_record): version_equal = version == old_version if dg_date_before: - if not self._add_to_report(95, dg_valueline): + if not self._add_to_report(229, dg_valueline): success = False elif dg_date_equal and version_equal: - if not self._add_to_report(96, dg_valueline): + if not self._add_to_report(401, dg_valueline): success = False elif dg_date_equal: - if not self._add_to_report(97, dg_valueline): + if not self._add_to_report(402, dg_valueline): success = False elif version_equal: - if not self._add_to_report(98, dg_valueline): + if not self._add_to_report(403, dg_valueline): success = False instrument_name = self.extcsv.extcsv['INSTRUMENT']['Name'] @@ -1233,7 +1233,7 @@ def check_data_record(self, data_record): if instrument_name == 'ECC' and instrument_serial != old_serial: instrument_valueline = self.extcsv.line_num('INSTRUMENT') + 2 - if not self._add_to_report(99, instrument_valueline): + if not self._add_to_report(404, instrument_valueline): success = False return success diff --git a/woudc_data_registry/registry.py b/woudc_data_registry/registry.py index c0175522..c7c20ece 100644 --- a/woudc_data_registry/registry.py +++ b/woudc_data_registry/registry.py @@ -331,14 +331,15 @@ def save(self, obj=None): registry_config = config.EXTRAS.get('registry', {}) try: - flag_name = '_'.join([obj.__tablename__, 'enabled']) - if registry_config.get(flag_name, True): - # Use merge if needed: self.session.merge(obj) - self.session.add(obj) - else: - LOGGER.info(f'Registry persistence for \ - model {obj.__tablename__} disabled, skipping') - return + if obj is not None: + flag_name = '_'.join([obj.__tablename__, 'enabled']) + if registry_config.get(flag_name, True): + self.session.add(obj) + # self.session.merge(obj) + else: + LOGGER.info('Registry persistence for model {} disabled,' + ' skipping'.format(obj.__tablename__)) + return LOGGER.debug(f'Committing save of {obj}') self.session.commit() diff --git a/woudc_data_registry/tests/config/errors.csv b/woudc_data_registry/tests/config/errors.csv new file mode 100644 index 00000000..97758a43 --- /dev/null +++ b/woudc_data_registry/tests/config/errors.csv @@ -0,0 +1,126 @@ +Error Code,Error Type,Message Template,Notes +101,Warning,Not a text file +102,Error,No core metadata tables found. Not an Extended CSV file +103,Warning,Unexpected empty line between table header and fields +104,Warning,Improper delimiter used '{separator}' corrected to '\,' (comma) +105,Warning,#{table} field {oldfield} capitalization should be {newfield} +106,Warning,#{table} corrected to {newtable} using aliases +107,Warning,#{table} field {oldfield} corrected to {newfield} using aliases +108,Warning,#{table}.{field} value corrected to {newvalue} using aliases +109,Warning,#{table}.Time separator '{separator}' corrected to ':' (colon) +110,Warning,#{table}.Time corrected from 12-hour clock to 24-hour YYYY-MM-DD format +111,Warning,#{table}.Date separator '{separator}' corrected to '-' (hyphen) +112,Error,#{table}.Date not in YYYY-MM-DD format: missing separators +113,Error,#{table}.Date is incomplete +114,Error,#{table}.Date not in YYYY-MM-DD format: too many separators +115,Warning,#{table}.UTCOffset separator '{separator}' corrected to ':' (colon) +116,Warning,#{table}.UTCOffset {component} is less than 2 digits long +117,Warning,#{table}.UTCOffset {component} is missing\, default value is '00' (zero) +118,Warning,Missing sign in #{table}.UTCOffset\, default '+' (plus) +119,Warning,Invalid sign in #{table}.UTCOffset\, replacing with '{sign}' +120,Warning,#{table}.UTCOffset is a series of zeroes\, should be '+00:00:00' +121,Error,Cannot derive #MONTHLY table: missing #DAILY.ColumnO3 +122,Error,Lidar table counts are uneven between #OZONE_PROFILE and #OZONE_SUMMARY +123,Error,Spectral table counts are uneven between #TIMESTAMP\, #GLOBAL\, and #{summary_table} +201,Error,Missing required table #{table} +202,Error,Excess table #{table} does not belong in {dataset} file +203,Error,Missing required field #{table}.{field} +204,Error,Required field #{table}.{field} is null or empty +205,Warning,Optional field #{table}.{field} is null or empty +206,Error,Table #{table} has no fields +207,Error,Table #{table} has no fields,Placeholder for optional-table-specific error +208,Error,Required table #{table} contains no data +209,Warning,Optional table #{table} contains no data +210,Error,No non-core data tables found +211,Error,Unrecognized data {row} +212,Warning,#{table} row has more values than #{table} has columns +213,Error,Fewer than minimum {bound} occurrences of table #{table} found +214,Error,More than maximum {bound} occurrences of table #{table} found +215,Error,Fewer than minimum {bound} number of rows in table #{table} +216,Error,More than maximum {bound} number of rows in table #{table} +217,Warning,#CONTENT.Level should be {value} according to present tables +218,Warning,#CONTENT.Level {oldvalue} should be a decimal number ({newvalue}) +219,Warning,#CONTENT.Form {oldvalue} should be integral ({newvalue}) +220,Error,Cannot assess expected table set: #CONTENT.{field} unknown +221,Warning,Missing #DATA_GENERATION.Date\, defaults to processing date +222,Warning,#DATA_GENERATION.Version does not have decimal place +223,Error,Null value found for #INSTRUMENT.Name +224,Warning,Null value found for #INSTRUMENT.Model +225,Warning,Null value found for #INSTRUMENT.Number +226,Warning,Inconsistent Time values between #TIMESTAMP tables +227,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for non-TIMESTAMP tables +228,Warning,First #TIMESTAMP.Time cannot be more recent than other time(s) +229,Error,Submitted file #DATA_GENERATION.Date is earlier than previously submitted version +230,Error,No ozone data in #DAILY table +231,Warning,#DAILY.Date found in non-chronological order +232,Warning,#DAILY.Date has different year than #TIMESTAMP.Date +233,Warning,Duplicate observations found in #DAILY table for Date=(date) +234,Warning,Multiple observations found with #DAILY.Date {date} +235,Warning,#TIMESTAMP.Date before #DAILY does not equal first date of #DAILY +236,Warning,#TIMESTAMP.Date after #DAILY does not equal last date of #DAILY +237,Warning,More than two #TIMESTAMP tables found in file +238,Warning,#TIMESTAMP table after #DAILY is missing\, deriving based on requirements +239,Warning,Missing #MONTHLY table\, deriving based on requirements +240,Warning,Missing value for #MONTHLY.{field}\, deriving based on requirements +241,Warning,#MONTHLY.{field} differs from derived value +242,Warning,#OBSERVATIONS.Time found in non-chronological order +243,Warning,Duplicate observations found in #OBSERVATIONS table for Time={time} +244,Warning,Multiple observations found with #OBSERVATIONS.Time {time} +245,Warning,#{table}.Date found in non-chronological order,Reserved for Umkehr data tables +246,Warning,Multiple observations found with #{table}.Date {date},Reserved for Umkehr data tables +247,Warning,#TIMESTAMP.Date before #{table} does not equal first date of #{table},Reserved for Umkehr data tables +248,Warning,#TIMESTAMP.Date after #{table} does not equal last date of #{table},Reserved for Umkehr data tables +249,Warning,#TIMESTAMP table after #{table} is missing\, deriving based on requirements,Reserved for Umkehr data tables +250,Warning,Excess field {field} does not belong in table {table} +251,Warning,Duplicate observations found in #{table} for Date={date},Reserved for Umkehr data tables +301,Error,Failed to parse #{table}.Time {component}: contains invalid characters +302,Error,Failed to parse #{table}.Date {component}: contains invalid characters +303,Error,#{table}.Date {component} is not within allowable range [{lower}]-[{upper}],Reserved for year and month components +304,Error,#{table}.Date day is not within allowable range [{lower}]-[{upper}] +305,Error,Failed to parse #{table}.UTCOffset: contains invalid characters +306,Warning,Missing #CONTENT.Class\, default is 'WOUDC' +307,Error,#CONTENT.Class {value} failed to validate against registry +308,Error,#CONTENT.Category {value} failed to validate against registry +309,Error,Unknown #CONTENT.Level for dataset {dataset} +310,Error,Failed to parse #CONTENT.Level: contains invalid characters +311,Error,Failed to parse #CONTENT.Form: contains invalid characters +312,Error,Cannot resolve missing or invalid #CONTENT.Form +313,Error,Unknown #CONTENT.Form for dataset {dataset} and level {level} +314,Warning,Missing #DATA_GENERATION.Version\, defaults to {default} +315,Warning,#DATA_GENERATION.Version is not within allowable range [{lower}]-[{upper}] +316,Error,Failed to parse #DATA_GENERATION.Version: contains invalid characters +317,Error,#DATA_GENERATION.Agency failed to validate against registry +318,Warning,#PLATFORM.ID is not 3 digits long +319,Error,#PLATFORM.ID not found in registry +320,Error,#PLATFORM.Type in file does not match registry +321,Error,#PLATFORM.Name in file does not match registry +322,Error,#PLATFORM.Country in file does not match registry +323,Warning,Ship #PLATFORM.Country should be 'XY' to meet ISO-3166 standards +324,Warning,Failed to parse #LOCATION.Height: contains invalid characters +325,Error,#LOCATION.{field} is not within allowable range [{lower}]-[{upper}],Reserved for fields Latitude and Longitude +326,Warning,#LOCATION.Height is not within allowable range [{lower}]-[{upper}] +327,Warning,#LOCATION.{field} in file does not match registry,Reserved for fields Latitude and Longitude +328,Warning,#LOCATION.Height in file does not match registry +329,Warning,Null value found for #INSTRUMENT.Model +330,Warning,Null value found for #INSTRUMENT.Number +331,Error,#INSTRUMENT.Name not found in registry +332,Error,#INSTRUMENT.Model not found in registry +333,Error,Instrument failed to validate against registry +334,Error,Deployment {ident} not found in registry +335,Error,Failed to parse #{table}.{field} due to errors: {reason} +336,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for TIMESTAMP tables +337,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for non-TIMESTAMP tables +338,Warning,ECC instrument serial number different from previous submission +339,Error,Failed to parse #LOCATION.{field}: contains invalid characters,Reserved for fields Latitude and Longitude +340,Warning,#{table}.Time {component} is not within allowable range [{lower}]-[{upper}] +401,Error,Submitted file version and #DATA_GENERAION.Date identical to previously submitted file +402,Error,Submitted #DATA_GENERATION.Date is identical to previously submitted file +403,Error,Submitted version number is identical to previously submitted file +404,Warning,ECC instrument serial number different from previous submission +405,Warning,Data file successfully persisted +406,Warning,New instrument added +407,Warning,New deployment added +408,Warning,New station name added +409,Warning,New contribution added +410,Error,Data file failed to validate +1000,Error,Unassigned error message \ No newline at end of file diff --git a/woudc_data_registry/tests/test_report_generation.py b/woudc_data_registry/tests/test_report_generation.py index 1b5759ac..2a7fdf58 100644 --- a/woudc_data_registry/tests/test_report_generation.py +++ b/woudc_data_registry/tests/test_report_generation.py @@ -123,21 +123,25 @@ def test_uses_error_definition(self): """Test that error/warning feedback responds to input files""" # The two error files below have different error types for error 1. - all_warnings = resolve_test_data_path('config/all_warnings.csv') - all_errors = resolve_test_data_path('config/all_errors.csv') + all_errors = resolve_test_data_path('config/errors.csv') with report.OperatorReport(SANDBOX_DIR) as op_report: - op_report.read_error_definitions(all_warnings) + op_report.read_error_definitions(all_errors) - self.assertIn(1, op_report._error_definitions) - _, success = op_report.add_message(1) + print("Checking Error 245") + self.assertIn(245, op_report._error_definitions) + _, success = op_report.add_message(245) self.assertFalse(success) - op_report.read_error_definitions(all_errors) + self.assertIn(101, op_report._error_definitions) + _, success = op_report.add_message(101) + self.assertFalse(success) + + # op_report.read_error_definitions(all_errors) - self.assertIn(1, op_report._error_definitions) - _, success = op_report.add_message(1) - self.assertTrue(success) + # self.assertIn(101, op_report._error_definitions) + # _, success = op_report.add_message(101) + # self.assertTrue(success) def test_passing_operator_report(self): """Test that a passing file is written in the operator report""" @@ -159,7 +163,7 @@ def test_passing_operator_report(self): output_path = os.path.join(SANDBOX_DIR, 'operator-report.csv') - op_report.add_message(200) # File passes validation + op_report.add_message(405) # File passes validation op_report.write_passing_file(infile, ecsv, data_record) self.assertTrue(os.path.exists(output_path)) @@ -169,7 +173,7 @@ def test_passing_operator_report(self): report_line = next(reader) self.assertEqual(report_line[0], 'P') - self.assertEqual(report_line[2], '200') + self.assertEqual(report_line[2], '405') self.assertIn(agency, report_line) self.assertIn(os.path.basename(infile), report_line) @@ -197,7 +201,7 @@ def test_warning_operator_report(self): output_path = os.path.join(SANDBOX_DIR, 'operator-report.csv') - op_report.add_message(200) # File passes validation + op_report.add_message(405) # File passes validation op_report.write_passing_file(infile, ecsv, data_record) self.assertTrue(os.path.exists(output_path)) @@ -217,7 +221,7 @@ def test_warning_operator_report(self): report_line = next(reader) self.assertEqual(report_line[0], 'P') self.assertEqual(report_line[1], 'Warning') - self.assertEqual(report_line[2], '200') + self.assertEqual(report_line[2], '405') self.assertIn(agency, report_line) self.assertIn(os.path.basename(infile), report_line) @@ -246,7 +250,7 @@ def test_failing_operator_report(self): NonStandardDataError): output_path = os.path.join(SANDBOX_DIR, 'run1') - op_report.add_message(209) + op_report.add_message(410) op_report.write_failing_file(infile, agency, ecsv) output_path = os.path.join(SANDBOX_DIR, @@ -277,7 +281,7 @@ def test_failing_operator_report(self): report_line = next(reader) self.assertEqual(report_line[0], 'F') self.assertEqual(report_line[1], 'Error') - self.assertEqual(report_line[2], '209') + self.assertEqual(report_line[2], '410') self.assertIn(agency, report_line) self.assertIn(os.path.basename(infile), report_line) @@ -314,7 +318,7 @@ def test_mixed_operator_report(self): NonStandardDataError) as err: expected_errors[fullpath] = len(err.errors) - op_report.add_message(209) + op_report.add_message(410) op_report.write_failing_file(fullpath, agency) continue @@ -334,7 +338,7 @@ def test_mixed_operator_report(self): expected_warnings[fullpath] = len(ecsv.warnings) expected_errors[fullpath] = len(ecsv.errors) - op_report.add_message(209) + op_report.add_message(410) op_report.write_failing_file(fullpath, agency, ecsv) output_path = os.path.join(SANDBOX_DIR, @@ -352,9 +356,9 @@ def test_mixed_operator_report(self): else: self.assertEqual(line[0], 'F') - if line[2] == '200': + if line[2] == '405': self.assertEqual(expected_errors[line[12]], 0) - elif line[2] == '209': + elif line[2] == '410': self.assertGreater(expected_errors[line[12]], 0) elif line[1] == 'Warning': warnings[line[12]] += 1 From 6f1ecb3247a71d6a4ee8e00a7a98c4261aa60acb Mon Sep 17 00:00:00 2001 From: Simran Mattu Date: Wed, 18 Sep 2024 14:06:36 +0000 Subject: [PATCH 2/4] updated main.yml --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index eb9e7154..2f844058 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,7 +20,7 @@ jobs: WDR_SEARCH_URL: http://localhost:9200/ WDR_WAF_BASEURL: https://woudc.org/archive/ WDR_WAF_BASEDIR: /tmp - WDR_ERROR_CONFIG: woudc_data_registry/tests/config/Updated_Errors.csv + WDR_ERROR_CONFIG: woudc_data_registry/tests/config/errors.csv WDR_ALIAS_CONFIG: data/aliases.yml WDR_EXTRA_CONFIG: data/extra-options.yml GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}" From 6663eedae7fc352e1f551c34767b793e05f401e2 Mon Sep 17 00:00:00 2001 From: Simran Mattu Date: Wed, 18 Sep 2024 14:52:32 +0000 Subject: [PATCH 3/4] Fixed a test --- woudc_data_registry/tests/test_report_generation.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/woudc_data_registry/tests/test_report_generation.py b/woudc_data_registry/tests/test_report_generation.py index 2a7fdf58..5690628a 100644 --- a/woudc_data_registry/tests/test_report_generation.py +++ b/woudc_data_registry/tests/test_report_generation.py @@ -128,20 +128,14 @@ def test_uses_error_definition(self): with report.OperatorReport(SANDBOX_DIR) as op_report: op_report.read_error_definitions(all_errors) - print("Checking Error 245") self.assertIn(245, op_report._error_definitions) - _, success = op_report.add_message(245) + _, success = op_report.add_message(245, table="flight_summary") self.assertFalse(success) self.assertIn(101, op_report._error_definitions) _, success = op_report.add_message(101) self.assertFalse(success) - # op_report.read_error_definitions(all_errors) - - # self.assertIn(101, op_report._error_definitions) - # _, success = op_report.add_message(101) - # self.assertTrue(success) def test_passing_operator_report(self): """Test that a passing file is written in the operator report""" From 759e85e905d98b23177730fb370f90d981e5e29f Mon Sep 17 00:00:00 2001 From: Simran Mattu Date: Wed, 18 Sep 2024 15:37:37 +0000 Subject: [PATCH 4/4] test --- woudc_data_registry/processing.py | 8 ++++++-- woudc_data_registry/tests/test_report_generation.py | 3 --- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/woudc_data_registry/processing.py b/woudc_data_registry/processing.py index 7f15c36c..fafc87a1 100644 --- a/woudc_data_registry/processing.py +++ b/woudc_data_registry/processing.py @@ -939,7 +939,8 @@ def check_location(self): height_numeric = float(height) if height else None if not height or -50 <= height_numeric <= 5100: LOGGER.debug('Validated instrument height') - elif not self._add_to_report(326, valueline, lower=-50, upper=5100): + elif not self._add_to_report( + 326, valueline, lower=-50, upper=5100): success = False except ValueError: if not self._add_to_report(324, valueline): @@ -1147,7 +1148,10 @@ def check_time_series(self): success = False else: if other_date > dg_date: - err_code = 336 if table.startswith('TIMESTAMP') else 337 + err_code = ( + 336 if table.startswith('TIMESTAMP') + else 337 + ) if not self._add_to_report(err_code, line, table=table): success = False diff --git a/woudc_data_registry/tests/test_report_generation.py b/woudc_data_registry/tests/test_report_generation.py index 5690628a..2b5c70c5 100644 --- a/woudc_data_registry/tests/test_report_generation.py +++ b/woudc_data_registry/tests/test_report_generation.py @@ -136,7 +136,6 @@ def test_uses_error_definition(self): _, success = op_report.add_message(101) self.assertFalse(success) - def test_passing_operator_report(self): """Test that a passing file is written in the operator report""" @@ -938,7 +937,6 @@ def test_email_summary_single_fix(self): with open(output_path) as output: lines = output.read().splitlines() self.assertEqual(len(lines), 8) - self.assertEqual(lines[0], 'MSC (placeholder@mail.com)') self.assertEqual(lines[1], 'Total files received: 1') self.assertEqual(lines[2], 'Number of passed files: 0') @@ -1147,7 +1145,6 @@ def test_email_summary_multiple_causes(self): self.assertIn(lines[9], fail_group) self.assertNotIn('.csv', lines[10]) self.assertIn(lines[11], fail_group) - self.assertEqual(lines[12], 'Summary of Fixes:') self.assertNotIn('.csv', lines[13]) self.assertIn(lines[14], fix_group)