diff --git a/README.md b/README.md index 6248a6522..db8b29084 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ Make sure, that you have defined the Github package Maven repository, in order f A Nanopore NGS measurement output is delivered to us as a nested folder structure, following this model: -![Nanopore Data Structure Model](./doc/figures/Nanopore_Data_Structure_Model.svg) +![Nanopore Data Structure Model](./doc/figures/Nanopore_Data_Structure_Model.png) #### Nanopore usage example diff --git a/doc/figures/Nanopore_Data_Structure_Model.png b/doc/figures/Nanopore_Data_Structure_Model.png new file mode 100644 index 000000000..d13f4297d Binary files /dev/null and b/doc/figures/Nanopore_Data_Structure_Model.png differ diff --git a/doc/figures/Nanopore_Data_Structure_Model.svg b/doc/figures/Nanopore_Data_Structure_Model.svg deleted file mode 100644 index b62c81d5a..000000000 --- a/doc/figures/Nanopore_Data_Structure_Model.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -Root Folder(OxfordNanoporeExperiment)Root Folder...Measurement Folder(OxfordNanoporeMeasurement)Measurement Folder...111..n1..nFastQ Fail FolderFastQ Fail FolderFastQ Pass FolderFastQ Pass FolderFast5 Pass FolderFast5 Pass FolderFast5 Fail FolderFast5 Fail Folder1111111111111111Sequencing Summary LogSequencing Summary LogFinal Summary LogFinal Summary Log Report MD LogReport MD Log111111111111FastQ FolderFastQ FolderFastQ FileFastQ File110..n0..nBarcodedFolderBarcodedFolderExtendsExtendsData FileData FileExtendsExtendsDataFolderDataFolderExtendsExtendsDataFolderDataFolderExtendsExtendsUnclassified FolderUnclassified Folder110..n0..n110..n0..nFastQ FileFastQ File110..n0..n110..n0..nFastQ FolderFastQ FolderFastQ FileFastQ FileUnclassified FolderUnclassified Folder110..n0..nFastQ FileFastQ File110..n0..nFast5 FolderFast5 FolderFast5 FileFast5 FileUnclassified FolderUnclassified Folder110..n0..nFast5 FileFast5 File110..n0..nFast5 FolderFast5 FolderFast5 FileFast5 FileUnclassified FolderUnclassified Folder110..n0..nFast5 FileFast5 File110..n0..n110..n0..n110..n0..n110..n0..n110..n0..n110..n0..n110..n0..n110..n0..n110..n0..n110..n0..nOptional FileOptional File110..n0..nData FileData FileExtendsExtendsData FileData FileExtendsExtendsData FileData FileExtendsExtendsOther ReportsOther ReportsDataFolderDataFolderExtendsExtends110..10..1DataFolderDataFolderExtendsExtendsDataFolderDataFolderExtendsExtendsDataFolderDataFolderExtendsExtendsDuty Time LogDuty Time LogThroughput LogThroughput LogReport PDF LogReport PDF LogDrift Correction LogDrift Correction LogMux Scan Data LogMux Scan Data LogSequencing Summary LogSequencing Summary LogSequencing Telemetry LogSequencing Telemetry LogGuppy Basecalling Client LogGuppy Basecalling...Fastq Fail FolderFastq Fail Folder11111111Data FileData FileExtendsExtendsFastQ FolderFastQ FolderUnclassified FolderUnclassified FolderFastQ FileFastQ FileFastQ FileFastQ File110..n0..n110..n0..n0..n0..n0..n0..n110..n0..nFastq Pass FolderFastq Pass FolderFastQ FolderFastQ FolderUnclassified FolderUnclassified FolderFastQ FileFastQ FileFastQ FileFastQ File110..n0..n110..n0..n110..n0..n0..n0..n110..n0..nBasecallingBasecallingDataFolderDataFolderExtendsExtends11110..10..1111111111111111111DataFolderDataFolderExtendsExtendsDataFolderDataFolderExtendsExtends110..n0..n110..n0..n110..n0..n110..n0..n110..n0..nData FileData FileExtendsExtendsData FileData FileExtendsExtendsData FileData FileExtendsExtendsData FileData FileExtendsExtendsData FileData FileExtendsExtendsData FileData FileExtendsExtendsData FileData FileExtendsExtendsFastQ FolderFastQ FolderFast5 FolderFast5 FolderDataFolderDataFolderFastQ FileFastQ FileFast5 FileFast5 FileDataFileDataFileOptional FileOptional FileData FileData FileExtendsExtendsOptional FolderOptional FolderDataFolderDataFolder110..n0..nExtendsExtends110..n0..nOptional FolderOptional FolderDataFolderDataFolderExtendsExtends0..n0..n110..n0..nDrift Correction LogDrift Correction LogMux Scan Data LogMux Scan Data Log11111111Text is not SVG - cannot display \ No newline at end of file diff --git a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy index 138c5292f..a8179982b 100644 --- a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy +++ b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy @@ -244,6 +244,7 @@ final class OxfordNanoporeExperiment implements ExperimentFolder { FAST5_FILE(FQDN_FILES + ".Fast5File"), FASTQ_FILE(FQDN_FILES + ".FastQFile"), FASTQ_ZIPPED_FILE(FQDN_FILES + ".FastQZippedFile"), + POD5_FILE(FQDN_FILES + ".Pod5File"), FINAL_SUMMARY_LOG(FQDN_FILES + ".FinalSummaryLog"), MUX_SCAN_DATA_LOG(FQDN_FILES + ".MuxScanDataLog"), REPORT_MD_LOG(FQDN_FILES + ".ReportMdLog"), @@ -299,10 +300,14 @@ final class OxfordNanoporeExperiment implements ExperimentFolder { FASTQ_FOLDER(FQDN_FOLDERS + ".FastQFolder"), FAST5_PASS_FOLDER(FQDN_FOLDERS + ".Fast5PassFolder"), FAST5_FAIL_FOLDER(FQDN_FOLDERS + ".Fast5FailFolder"), + FAST5_SKIP_FOLDER(FQDN_FOLDERS + ".Fast5SkipFolder"), FASTQ_PASS_FOLDER(FQDN_FOLDERS + ".FastQPassFolder"), FASTQ_FAIL_FOLDER(FQDN_FOLDERS + ".FastQFailFolder"), UNCLASSIFIED_FAST5_FOLDER(FQDN_FOLDERS + ".UnclassifiedFast5Folder"), UNCLASSIFIED_FASTQ_FOLDER(FQDN_FOLDERS + ".UnclassifiedFastQFolder"), + POD5_PASS_FOLDER(FQDN_FOLDERS + ".Pod5PassFolder"), + POD5_FAIL_FOLDER(FQDN_FOLDERS + ".Pod5FailFolder"), + POD5_SKIP_FOLDER(FQDN_FOLDERS + ".Pod5SkipFolder"), OTHER_REPORTS_FOLDER(FQDN_FOLDERS + ".OtherReportsFolder"), BASECALLING_FOLDER(FQDN_FOLDERS + ".BasecallingFolder"), diff --git a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy index 131c1c401..463075248 100644 --- a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy +++ b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy @@ -57,13 +57,13 @@ final class OxfordNanoporeMeasurement { this.pooledSamplesMeasurement = containsAtLeastOneBarcodedFolder(folders["fast5pass"]) // There can be still pooled samples in the failed folder, worst case is all // samples failed, so we need to check there too - if (! pooledSamplesMeasurement) { + if (!pooledSamplesMeasurement) { this.pooledSamplesMeasurement = containsAtLeastOneBarcodedFolder(folders["fast5fail"]) } } private void assessBasecallingStatus() { - this.hasBasecallingData = folders["basecalling"]; + this.hasBasecallingData = folders["basecalling"] } private static boolean containsAtLeastOneBarcodedFolder(DataFolder folder) { @@ -84,12 +84,24 @@ final class OxfordNanoporeMeasurement { case Fast5FailFolder: folders["fast5fail"] = element as Fast5FailFolder break + case Fast5SkipFolder: + folders["fast5skip"] = element as Fast5SkipFolder + break case FastQPassFolder: folders["fastqpass"] = element as FastQPassFolder break case FastQFailFolder: folders["fastqfail"] = element as FastQFailFolder break + case Pod5PassFolder: + folders["pod5pass"] = element as Pod5PassFolder + break + case Pod5FailFolder: + folders["pod5fail"] = element as Pod5FailFolder + break + case Pod5SkipFolder: + folders["pod5skip"] = element as Pod5SkipFolder + break case DataFile: logFilesCollection.add(element as DataFile) break @@ -101,24 +113,38 @@ final class OxfordNanoporeMeasurement { } private void assessState() throws IllegalStateException { - // Condition one: Don't allow Fast5 pass and fail folder are empty - assessFast5Content() - // Condition two: Don't allow Fastq pass and fail folder are empty - assessFastQContent() + boolean isValid = false + // We need to ensure that fastq and fast5 information is provided if guppy basecaller was used + if (areFast5FoldersInMeasurement() && areFastQFoldersInMeasurement()) { + isValid = true + } + //// We need to ensure that pod5_skip and fast5_skip information is provided if dorado basecaller was used + if (arePod5FoldersInMeasurement()) { + isValid = true + } + if (isValid == false) { + throw new IllegalStateException("No valid data is contained in measurement") + } } - private void assessFast5Content() throws IllegalStateException { - if (folders["fast5pass"].getChildren().isEmpty() && folders["fast5fail"].getChildren() - .isEmpty()) { - throw new IllegalStateException("The fast5 pass folder and fail folder are empty.") - } + // Condition one: Don't allow empty Fast5 pass and fail folder + private boolean areFast5FoldersInMeasurement() { + return isDataFolderInMeasurement("fast5pass") || isDataFolderInMeasurement("fast5fail") + } + // Condition two: Don't allow empty Fastq pass and fail folder + private boolean areFastQFoldersInMeasurement() { + return isDataFolderInMeasurement("fastqpass") || isDataFolderInMeasurement("fastqfail") + } + // Condition three: Don't allow empty Pod5 skip and fast5 skip folder + private boolean arePod5FoldersInMeasurement() { + return isDataFolderInMeasurement("fast5skip") || isDataFolderInMeasurement("pod5skip") } - private void assessFastQContent() throws IllegalStateException { - if (folders["fastqpass"].getChildren().isEmpty() && folders["fastqfail"].getChildren() - .isEmpty()) { - throw new IllegalStateException("The fastq pass folder and fail folder are empty.") + private boolean isDataFolderInMeasurement(String string) { + if (folders[string] == null) { + return false } + return !folders[string].getChildren().isEmpty() } /** @@ -284,12 +310,25 @@ final class OxfordNanoporeMeasurement { private Map> prepareRawData(String sampleId) { final def result = new HashMap() final def dataFolders = [ - "fast5fail": (folders.get("fast5fail") as DataFolder), - "fast5pass": (folders.get("fast5pass") as DataFolder), - "fastqpass": (folders.get("fastqpass") as DataFolder), - "fastqfail": (folders.get("fastqfail") as DataFolder) + "fast5fail" : (folders.get("fast5fail") as DataFolder), + "fast5pass" : (folders.get("fast5pass") as DataFolder), + "fastqpass" : (folders.get("fastqpass") as DataFolder), + "fastqfail" : (folders.get("fastqfail") as DataFolder) ] - if(hasBasecallingData) dataFolders.put("basecalling", (folders.get("basecalling") as DataFolder)) + if (hasBasecallingData) dataFolders.put("basecalling", (folders.get("basecalling") as DataFolder)) + //Only add dorado based minimal required datafolders if present + if (folders.get("fast5skip") != null) { + dataFolders.put("fast5skip", (folders.get("fast5skip") as DataFolder)) + } + if (folders.get("pod5skip") != null) { + dataFolders.put("pod5skip", (folders.get("pod5skip") as DataFolder)) + } + if (folders.get("pod5fail") != null) { + dataFolders.put("pod5fail", (folders.get("pod5fail") as DataFolder)) + } + if (folders.get("pod5pass") != null) { + dataFolders.put("pod5pass", (folders.get("pod5pass") as DataFolder)) + } result.put(sampleId, dataFolders) return result } diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/Pod5File.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/Pod5File.groovy new file mode 100644 index 000000000..24b499380 --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/Pod5File.groovy @@ -0,0 +1,30 @@ +package life.qbic.datamodel.datasets.datastructure.files.nanopore + +import life.qbic.datamodel.datasets.datastructure.files.DataFile + +/** + * A specialisation of a DataFile, represents an Oxford Nanopore pod5 file + * + */ +class Pod5File extends DataFile { + + final private static String FILE_TYPE = "pod5" + + final private static String NAME_SCHEMA = /.*\.pod5$/ + + protected Pod5File(String name, String relativePath) { + super(name, relativePath, FILE_TYPE) + validateName() + } + + static Pod5File create(String name, String relativePath) { + return new Pod5File(name, relativePath) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore summary schema!") + } + } + +} diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Fast5SkipFolder.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Fast5SkipFolder.groovy new file mode 100644 index 000000000..9fe46e9cf --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Fast5SkipFolder.groovy @@ -0,0 +1,38 @@ +package life.qbic.datamodel.datasets.datastructure.folders.nanopore + +import life.qbic.datamodel.datasets.datastructure.files.nanopore.Fast5File +import life.qbic.datamodel.datasets.datastructure.folders.DataFolder + +/** + * A special case of a DataFolder, its name is always fast5_skip. + * + * Its children field contains a list of type List + * + */ +class Fast5SkipFolder extends DataFolder { + + final private static String NAME_SCHEMA = /fast5_skip/ + + protected Fast5SkipFolder() {} + + protected Fast5SkipFolder(String name, String relativePath, List children) { + super(name, relativePath, children) + validateName() + } + + /** + * Creates a new instance of a Fast5SkipFolder object + * @param relativePath The relative path of the folder + * @param children A list with child elements of the folder + * @return A new instance of a Fast5SkipFolder object + */ + static Fast5SkipFolder create(String name, String relativePath, List children) { + return new Fast5SkipFolder(name, relativePath, children) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore Fast5Skip directory schema!") + } + } +} diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Pod5FailFolder.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Pod5FailFolder.groovy new file mode 100644 index 000000000..b1cbdc934 --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Pod5FailFolder.groovy @@ -0,0 +1,39 @@ +package life.qbic.datamodel.datasets.datastructure.folders.nanopore + +import life.qbic.datamodel.datasets.datastructure.folders.DataFolder + +/** + * A special case of a DataFolder, its name is always pod5_fail. + * + * Its children field contains either a list of type List or List + * + */ +class Pod5FailFolder extends DataFolder { + + final private static String NAME_SCHEMA = /pod5_fail/ + + protected Pod5FailFolder() {} + + protected Pod5FailFolder(String name, String relativePath, List children) { + super(name, relativePath, children) + validateName() + } + + /** + * Creates a new instance of a Pod5FailFolder object + * + * @param name The folder name + * @param relativePath The relative path of the folder + * @param children A list with child elements of the folder + * @return A new instance of a Pod5FailFolder object + */ + static Pod5FailFolder create(String name, String relativePath, List children) { + new Pod5FailFolder(name, relativePath, children) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore Pod5Fail directory schema!") + } + } +} diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Pod5PassFolder.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Pod5PassFolder.groovy new file mode 100644 index 000000000..5de6adedf --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Pod5PassFolder.groovy @@ -0,0 +1,39 @@ +package life.qbic.datamodel.datasets.datastructure.folders.nanopore + +import life.qbic.datamodel.datasets.datastructure.folders.DataFolder + +/** + * A special case of a DataFolder, its name is always pod5_pass. + * + * Its children field contains either a list of type List or List + * + */ +class Pod5PassFolder extends DataFolder { + + final private static String NAME_SCHEMA = /pod5_pass/ + + protected Pod5PassFolder() {} + + protected Pod5PassFolder(String name, String relativePath, List> children) { + super(name, relativePath, children) + validateName() + } + + /** + * Creates a new instance of a Pod5PassFolder object + * + * @param name The folder name + * @param relativePath The relative path of the folder + * @param children A list with child elements of the folder + * @return A new instance of a Pod5PassFolder object + */ + static Pod5PassFolder create(String name, String relativePath, List> children) { + new Pod5PassFolder(name, relativePath, children) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore Pod5Pass directory schema!") + } + } +} diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Pod5SkipFolder.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Pod5SkipFolder.groovy new file mode 100644 index 000000000..fdd66ad0e --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Pod5SkipFolder.groovy @@ -0,0 +1,38 @@ +package life.qbic.datamodel.datasets.datastructure.folders.nanopore + +import life.qbic.datamodel.datasets.datastructure.files.nanopore.Pod5File +import life.qbic.datamodel.datasets.datastructure.folders.DataFolder + +/** + * A special case of a DataFolder, its name is always pod5_skip. + * + * Its children field contains a list of type List + * + */ +class Pod5SkipFolder extends DataFolder { + + final private static String NAME_SCHEMA = /pod5_skip/ + + protected Pod5SkipFolder() {} + + protected Pod5SkipFolder(String name, String relativePath, List children) { + super(name, relativePath, children) + validateName() + } + + /** + * Creates a new instance of a Pod5SkipFolder object + * @param relativePath The relative path of the folder + * @param children A list with child elements of the folder + * @return A new instance of a Pod5SkipFolder object + */ + static Pod5SkipFolder create(String name, String relativePath, List children) { + return new Pod5SkipFolder(name, relativePath, children) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore Pod5Skip directory schema!") + } + } +} diff --git a/src/main/groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputDoradoMinimal.groovy b/src/main/groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputDoradoMinimal.groovy new file mode 100644 index 000000000..fcee2f0e8 --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputDoradoMinimal.groovy @@ -0,0 +1,19 @@ +package life.qbic.datamodel.instruments + + +/** + * Represents the Nanopore instrument output data structure schema generated by employing the dorado basecaller with Pod5Files. + * + * The original schema is defined in as resource and is + * referenced here, wrapped in a Groovy class for reference + * in applications that want to validate the instrument + * output structure against the schema. + */ +class OxfordNanoporeInstrumentOutputDoradoMinimal { + + private static final String SCHEMA_PATH = "/schemas/nanopore-instrument-output_minimal_dorado.schema.json" + + static InputStream getSchemaAsStream() { + return OxfordNanoporeInstrumentOutputDoradoMinimal.getResourceAsStream(SCHEMA_PATH) + } +} diff --git a/src/main/groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputMinimal.groovy b/src/main/groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputMinimal.groovy index b752f033e..6420b7c30 100644 --- a/src/main/groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputMinimal.groovy +++ b/src/main/groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputMinimal.groovy @@ -11,7 +11,7 @@ package life.qbic.datamodel.instruments */ class OxfordNanoporeInstrumentOutputMinimal { - private static final String SCHEMA_PATH = "/schemas/nanopore-instrument-output_minimal_schema.json" + private static final String SCHEMA_PATH = "/schemas/nanopore-instrument-output_minimal.schema.json" static InputStream getSchemaAsStream() { return OxfordNanoporeInstrumentOutputMinimal.getResourceAsStream(SCHEMA_PATH) diff --git a/src/main/resources/schemas/nanopore-instrument-output_minimal_schema.json b/src/main/resources/schemas/nanopore-instrument-output_minimal.schema.json similarity index 100% rename from src/main/resources/schemas/nanopore-instrument-output_minimal_schema.json rename to src/main/resources/schemas/nanopore-instrument-output_minimal.schema.json diff --git a/src/main/resources/schemas/nanopore-instrument-output_minimal_dorado.schema.json b/src/main/resources/schemas/nanopore-instrument-output_minimal_dorado.schema.json new file mode 100644 index 000000000..35fc42fce --- /dev/null +++ b/src/main/resources/schemas/nanopore-instrument-output_minimal_dorado.schema.json @@ -0,0 +1,661 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "http://qbic.life/nanopore-instrument-output_minimal_dorado.schema.json", + "title": "Nanopore Instrument Output dorado basecalled minimal", + "description": "Describes in which form PromethION/MinION sequenced Nanopore data is received from the medical genetics lab. To be used if no other schema fits the description and ensure that the minimal necessary files are provided if the dorado basecaller was employed", + "definitions": { + "folder": { + "description": "Describes a folder", + "type": "object", + "required": [ + "name", + "path", + "children" + ], + "properties": { + "name": { + "description": "Folder name", + "type": "string", + "minLength": 1 + }, + "path": { + "description": "relative folderpath", + "type": "string", + "minLength": 1 + }, + "children": { + "description": "Describes files and/or sub-folders if existent", + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "$ref": "#/definitions/file" + } + ] + } + } + } + }, + "file": { + "description": "Describes a file", + "type": "object", + "required": [ + "name", + "path", + "file_type" + ], + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "path": { + "type": "string", + "minLength": 1 + }, + "file_type": { + "type": "string", + "minLength": 1 + } + } + }, + "qbic_code": { + "description": "Describes a QBiC code used as a prefix", + "type": "string", + "pattern": "Q\\w{4}\\d{3}[A-X][A-X0-9].*" + }, + "barcoded_folder": { + "description": "folder starting with qbic barcode prefix", + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "$ref": "#/definitions/qbic_code" + } + } + } + ] + }, + "fast5_file": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "file_type": { + "pattern": "fast5" + } + } + } + ] + }, + "pod5_file": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "file_type": { + "pattern": "pod5" + } + } + } + ] + }, + "fastqgz_file": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "file_type": { + "pattern": "fastq.gz" + } + } + } + ] + }, + "fastq_file": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "file_type": { + "pattern": "fastq" + } + } + } + ] + }, + "unclassified_folder": { + "description": "folder containing unassigned read file(s)", + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "unclassified" + } + } + } + ] + }, + "fast5_unclassified_folder": { + "description": "folder containing fast5 data from a pooling experiment, that could not be assigned to one of the known samples", + "allOf": [ + { + "$ref": "#/definitions/unclassified_folder" + }, + { + "properties": { + "children": { + "items": { + "$ref": "#/definitions/fast5_file" + }, + "minItems": 0 + } + } + } + ] + }, + "pod5_unclassified_folder": { + "description": "folder containing pod5 data from a pooling experiment, that could not be assigned to one of the known samples", + "allOf": [ + { + "$ref": "#/definitions/unclassified_folder" + }, + { + "properties": { + "children": { + "items": { + "$ref": "#/definitions/pod5_file" + }, + "minItems": 0 + } + } + } + ] + }, + "fastq_unclassified_folder": { + "description": "folder containing fastq and/or fastq.gz data from a pooling experiment, that could not be assigned to one of the known samples", + "allOf": [ + { + "$ref": "#/definitions/unclassified_folder" + }, + { + "properties": { + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fastqgz_file" + }, + { + "$ref": "#/definitions/fastq_file" + } + ] + }, + "minItems": 0 + } + } + } + ] + }, + "fast5_subfolder": { + "description": "folder containing fast5 data from a single sample (only when pooling is used)", + "allOf": [ + { + "$ref": "#/definitions/barcoded_folder" + }, + { + "properties": { + "children": { + "items": { + "$ref": "#/definitions/fast5_file" + }, + "minItems": 1 + } + } + } + ] + }, + "pod5_subfolder": { + "description": "folder containing pod5 data from a single sample (only when pooling is used)", + "allOf": [ + { + "$ref": "#/definitions/barcoded_folder" + }, + { + "properties": { + "children": { + "items": { + "$ref": "#/definitions/pod5_file" + }, + "minItems": 1 + } + } + } + ] + }, + "fast5_fail": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "fast5_fail" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fast5_subfolder" + }, + { + "$ref": "#/definitions/fast5_unclassified_folder" + }, + { + "$ref": "#/definitions/fast5_file" + } + ] + } + } + } + } + ] + }, + "fast5_pass": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "fast5_pass" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fast5_subfolder" + }, + { + "$ref": "#/definitions/fast5_unclassified_folder" + }, + { + "$ref": "#/definitions/fast5_file" + } + ] + } + } + } + } + ] + }, + "fast5_skip": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "fast5_skip" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fast5_file" + } + ] + } + } + } + } + ] + }, + "pod5_skip": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "pod5_skip" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/pod5_file" + } + ] + } + } + } + } + ] + }, + "pod5_fail": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "pod5_fail" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/pod5_subfolder" + }, + { + "$ref": "#/definitions/pod5_unclassified_folder" + }, + { + "$ref": "#/definitions/pod5_file" + } + ] + } + } + } + } + ] + }, + "pod5_pass": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "pod5_pass" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/pod5_subfolder" + }, + { + "$ref": "#/definitions/pod5_unclassified_folder" + }, + { + "$ref": "#/definitions/pod5_file" + } + ] + } + } + } + } + ] + }, + "fastq_fail": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "fastq_fail" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fastq_subfolder" + }, + { + "$ref": "#/definitions/fastq_unclassified_folder" + }, + { + "$ref": "#/definitions/fastqgz_file" + } + ] + } + } + } + } + ] + }, + "basecalling": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "basecalling" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fastq_fail" + }, + { + "$ref": "#/definitions/fastq_pass" + }, + { + "$ref": "#/definitions/optional_file" + } + ] + } + } + } + } + ] + }, + "fastq_pass": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "fastq_pass" + }, + "children": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/fastq_subfolder" + }, + { + "$ref": "#/definitions/fastq_unclassified_folder" + }, + { + "$ref": "#/definitions/fastqgz_file" + } + ] + } + } + } + } + ] + }, + "fastq_subfolder": { + "description": "folder containing gzipped fastq data from a single sample (only when pooling is used)", + "allOf": [ + { + "$ref": "#/definitions/barcoded_folder" + }, + { + "properties": { + "children": { + "items": { + "$ref": "#/definitions/fastqgz_file" + }, + "minItems": 1 + } + } + } + ] + }, + "optional_folder": { + "description": "Folder not expected in the current schemas but not invalidating the minimal datastructure required", + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": {} + } + ] + }, + "measurements": { + "description": "Top folder generated by the facility, containing one or more timestamped measurements", + "allOf": [ + { + "$ref": "#/definitions/barcoded_folder" + }, + { + "properties": { + "children": { + "items": { + "allOf": [ + { + "$ref": "#/definitions/measurement" + } + ] + }, + "minItems": 1 + } + } + } + ] + }, + "measurement": { + "allOf": [ + { + "$ref": "#/definitions/folder" + }, + { + "properties": { + "name": { + "pattern": "\\d{4}(0?[1-9]|1[012])(0?[1-9]|[12][0-9]|3[01])_([01][0-9]|2[0-3])([0-5][0-9]).*", + "description": "Name of measurement subfolder. Starts with date and time of measurement e.g. 20200122_1217..." + }, + "children": { + "type": "array", + "minItems": 6, + "contains": { + "oneOf": [ + { + "$ref": "#/definitions/fast5_skip" + }, + { + "$ref": "#/definitions/pod5_skip" + }, + { + "$ref": "#/definitions/final_summary_log" + }, + { + "$ref": "#/definitions/report_md_log" + }, + { + "$ref": "#/definitions/sequencing_summary_log" + } + ] + }, + "minContains": 5, + "uniqueItems": true + } + } + } + ] + }, + "final_summary_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "final_summary_.*" + }, + "file_type": { + "pattern": "txt" + } + } + } + ] + }, + "report_md_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "report_.*" + }, + "file_type": { + "pattern": "md" + } + } + } + ] + }, + "sequencing_summary_log": { + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": { + "name": { + "pattern": "sequencing_summary_.*" + }, + "file_type": { + "pattern": "txt" + } + } + } + ] + }, + "optional_file": { + "description": "File not expected in the current schemas but not invalidating the minimal datastructure required", + "allOf": [ + { + "$ref": "#/definitions/file" + }, + { + "properties": {} + } + ] + } + }, + "allOf": [ + { + "$ref": "#/definitions/measurements" + } + ] +} diff --git a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy index 3b94c87a0..fc051ce6b 100644 --- a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy +++ b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy @@ -61,6 +61,12 @@ class OxfordNanoporeExperimentSpec extends Specification { @Shared Map minimalDataStructurePooled + @Shared + Map minimalDoradoDataStructure + + @Shared + Map fullDoradoDataStructure + def setupSpec() { def folder = "nanopore/" InputStream stream = Thread.currentThread().getContextClassLoader().getResourceAsStream(folder+"valid-example.json") @@ -89,6 +95,13 @@ class OxfordNanoporeExperimentSpec extends Specification { // read in minimal required pooled example stream = Thread.currentThread().getContextClassLoader().getResourceAsStream(folder+"valid-minimal-structure-pooled.json") minimalDataStructurePooled = (Map) new JsonSlurper().parse(stream) + // read in minimal required example with dorado based basecalling + stream = Thread.currentThread().getContextClassLoader().getResourceAsStream(folder+"valid-minimal-structure-dorado-basecaller.json") + minimalDoradoDataStructure = (Map) new JsonSlurper().parse(stream) + stream.close() + // read in minimal required example with dorado based basecalling + stream = Thread.currentThread().getContextClassLoader().getResourceAsStream(folder+"valid-example-dorado-basecaller.json") + fullDoradoDataStructure = (Map) new JsonSlurper().parse(stream) stream.close() } @@ -179,6 +192,8 @@ class OxfordNanoporeExperimentSpec extends Specification { assert experiment.sampleCode == "QABCD001AB" assert measurements.size() == 1 assert measurements[0].asicTemp == "32.631687" + assert !measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("pod5_skip") + assert !measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("fast5_skip") } def "Create sample Oxford Nanopore experiment successfully for the minimal required pooled structure"() { @@ -195,6 +210,42 @@ class OxfordNanoporeExperimentSpec extends Specification { assert measurements[0].asicTemp == "32.631687" } + def "Create sample Oxford Nanopore experiment successfully for dorado basecaller generated minimal structure"() { + given: + final def example = minimalDoradoDataStructure + + when: + final def experiment = OxfordNanoporeExperiment.create(example) + final def measurements = experiment.getMeasurements() + + then: + assert experiment.sampleCode == "QABCD001AB" + assert measurements.size() == 1 + assert measurements[0].asicTemp == "32.631687" + assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("pod5skip") + assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("fast5skip") + } + + def "Create sample Oxford Nanopore experiment successfully for dorado basecaller generated full structure"() { + given: + final def example = fullDoradoDataStructure + + when: + final def experiment = OxfordNanoporeExperiment.create(example) + final def measurements = experiment.getMeasurements() + + then: + assert experiment.sampleCode == "QABCD001AB" + assert measurements.size() == 1 + assert measurements[0].asicTemp == "32.631687" + assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("pod5skip") + assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("fast5skip") + assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("fast5pass") + assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("fast5fail") + assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("pod5pass") + assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("pod5fail") + } + def "Create a simple pooled Oxford Nanopore experiment successfully"() { given: final def example = pooledDataStructure @@ -213,7 +264,7 @@ class OxfordNanoporeExperimentSpec extends Specification { } - def "Create unclassified example Oxford Nanopore experiment sucessfully"() { + def "Create unclassified example Oxford Nanopore experiment successfully"() { given: final def example = unclassifiedWorkingDataStructure diff --git a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeMeasurementSpec.groovy b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeMeasurementSpec.groovy index ec875a22e..dfc827757 100644 --- a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeMeasurementSpec.groovy +++ b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeMeasurementSpec.groovy @@ -37,6 +37,11 @@ class OxfordNanoporeMeasurementSpec extends Specification { UnclassifiedFast5Folder unclassifiedFast5Folder @Shared UnclassifiedFastQFolder unclassifiedFastQFolder + @Shared + Pod5SkipFolder pod5SkipFolder + @Shared + Fast5SkipFolder fast5SkipFolder + @Shared Map metaData @@ -230,6 +235,23 @@ class OxfordNanoporeMeasurementSpec extends Specification { } + def "If both pod5 skip and fast5 skip folder are empty, an IllegalStateException shall be thrown"() { + given: + def emptyPod5SkipFolder = Pod5SkipFolder.create("pod5_skip","root/pod5_skip", []) + def emptyFast5SkipFolder = Fast5SkipFolder.create("fast5_skip","root/fast5_skip", []) + + when: + OxfordNanoporeMeasurement.create( + "20200219_1107_1-E3-H3_PAE26974_454b8dc6", + "path/20200219_1107_1-E3-H3_PAE26974_454b8dc6", + [emptyPod5SkipFolder, emptyFast5SkipFolder], + metaData) + + then: + thrown(IllegalStateException) + + } + def "If either fastq pass or fail folder is empty, no IllegalStateException shall be thrown"() { given: def emptyFastQFailedFolder = FastQFailFolder.create("fastq_fail","root/fastq_fail", []) diff --git a/src/test/resources/nanopore/valid-example-dorado-basecaller.json b/src/test/resources/nanopore/valid-example-dorado-basecaller.json new file mode 100644 index 000000000..411f70d87 --- /dev/null +++ b/src/test/resources/nanopore/valid-example-dorado-basecaller.json @@ -0,0 +1,269 @@ +{ + "name": "QABCD001AB_E12A345a01_PAE12345", + "path": "./", + "children": [ + { + "name": "20200122_1217_1-A1-B1-PAE12345_1234567a", + "metadata": { + "adapter": "flongle", + "asic_temp": "32.631687", + "base_caller": "", + "base_caller_version": "3.2.8+bd67289", + "device_type": "promethion", + "flow_cell_id": "PAE26306", + "flow_cell_product_code": "FLO-PRO002", + "flow_cell_position": "2-A3-D3", + "hostname": "PCT0094", + "protocol": "sequencing/sequencing_PRO002_DNA:FLO-PRO002:SQK-LSK109:True", + "started": "2020-02-11T15:52:10.465982+01:00" + }, + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a", + "children": [ + { + "name": "report_.md", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/report_.md", + "file_type": "md" + }, + { + "name": "final_summary_.txt", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/final_summary_.txt", + "file_type": "txt" + }, + { + "name": "sequencing_summary_.txt", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/sequencing_summary_.txt", + "file_type": "txt" + }, + { + "name": "additional_file_.new", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/additional_file_.new", + "file_type": "new" + }, + { + "name": "not_relevant_file_.wow", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/not_relevant_file_.wow", + "file_type": "wow" + }, + { + "name": "unknown_folder", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/unknown_folder", + "children": [ + { + "name": "unknown_child_folder", + "path": "20200122_1217_1-A1-B1-PAE12345_1234567a/unknown_folder/unknown_child_folder", + "children": [ + { + "name": "unknown_file_.new", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/unknown_folder/unknown_child_folder/unknown_file_.new", + "file_type": "new" + } + ] + }, + { + "name": "unknown_file_.new", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/unknown_folder/unknown_file_.new", + "file_type": "new" + } + ] + }, + { + "name": "fast5_skip", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/", + "children": [ + { + "name": "myfile2.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile2.fast5", + "file_type": "fast5" + }, + { + "name": "myfile4.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile4.fast5", + "file_type": "fast5" + }, + { + "name": "myfile3.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile3.fast5", + "file_type": "fast5" + }, + { + "name": "myfile5.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile5.fast5", + "file_type": "fast5" + }, + { + "name": "myfile.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile.fast5", + "file_type": "fast5" + } + ] + }, + { + "name": "pod5_skip", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/", + "children": [ + { + "name": "myfile2.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile2.pod5", + "file_type": "pod5" + }, + { + "name": "myfile4.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile4.pod5", + "file_type": "pod5" + }, + { + "name": "myfile3.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile3.pod5", + "file_type": "pod5" + }, + { + "name": "myfile5.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile5.pod5", + "file_type": "pod5" + }, + { + "name": "myfile.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile.pod5", + "file_type": "pod5" + } + ] + }, + { + "name": "pod5_pass", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_pass", + "children": [ + { + "name": "myfile2.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_pass/myfile2.pod5", + "file_type": "pod5" + }, + { + "name": "myfile4.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_pass/myfile4.pod5", + "file_type": "pod5" + }, + { + "name": "myfile3.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_pass/myfile3.pod5", + "file_type": "pod5" + }, + { + "name": "myfile5.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_pass/myfile5.pod5", + "file_type": "pod5" + }, + { + "name": "myfile.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_pass/myfile.pod5", + "file_type": "pod5" + } + ] + }, + { + "name": "pod5_fail", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_fail/", + "children": [ + { + "name": "myfile2.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_fail/myfile2.pod5", + "file_type": "pod5" + }, + { + "name": "myfile4.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_fail/myfile4.pod5", + "file_type": "pod5" + }, + { + "name": "myfile3.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_fail/myfile3.pod5", + "file_type": "pod5" + }, + { + "name": "myfile5.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_fail/myfile5.pod5", + "file_type": "pod5" + }, + { + "name": "myfile.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_fail/myfile.pod5", + "file_type": "pod5" + } + ] + }, + { + "name": "basecalling", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling", + "children": [ + { + "name": "fastq_pass", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass", + "children": [ + { + "name": "myfile3.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile3.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile2.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile2.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile4.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile4.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile5.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile5.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile1.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile1.fastq.gz", + "file_type": "fastq.gz" + } + ] + }, + { + "name": "fastq_fail", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/", + "children": [ + { + "name": "myfile3.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile3.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile2.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile2.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile4.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile4.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile5.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile5.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile.fastq.gz", + "file_type": "fastq.gz" + } + ] + }, + { + "name": "guppy_basecall_client_log*.log", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/guppy_basecall_client_log*.log", + "file_type": "log" + } + ] + } + ] + } + ] +} diff --git a/src/test/resources/nanopore/valid-minimal-structure-dorado-basecaller.json b/src/test/resources/nanopore/valid-minimal-structure-dorado-basecaller.json new file mode 100644 index 000000000..9a60ed1cd --- /dev/null +++ b/src/test/resources/nanopore/valid-minimal-structure-dorado-basecaller.json @@ -0,0 +1,207 @@ +{ + "name": "QABCD001AB_E12A345a01_PAE12345", + "path": "./", + "children": [ + { + "name": "20200122_1217_1-A1-B1-PAE12345_1234567a", + "metadata": { + "adapter": "flongle", + "asic_temp": "32.631687", + "base_caller": "", + "base_caller_version": "3.2.8+bd67289", + "device_type": "promethion", + "flow_cell_id": "PAE26306", + "flow_cell_product_code": "FLO-PRO002", + "flow_cell_position": "2-A3-D3", + "hostname": "PCT0094", + "protocol": "sequencing/sequencing_PRO002_DNA:FLO-PRO002:SQK-LSK109:True", + "started": "2020-02-11T15:52:10.465982+01:00" + }, + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a", + "children": [ + { + "name": "report_.md", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/report_.md", + "file_type": "md" + }, + { + "name": "final_summary_.txt", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/final_summary_.txt", + "file_type": "txt" + }, + { + "name": "sequencing_summary_.txt", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/sequencing_summary_.txt", + "file_type": "txt" + }, + { + "name": "additional_file_.new", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/additional_file_.new", + "file_type": "new" + }, + { + "name": "not_relevant_file_.wow", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/not_relevant_file_.wow", + "file_type": "wow" + }, + { + "name": "unknown_folder", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/unknown_folder", + "children": [ + { + "name": "unknown_child_folder", + "path": "20200122_1217_1-A1-B1-PAE12345_1234567a/unknown_folder/unknown_child_folder", + "children": [ + { + "name": "unknown_file_.new", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/unknown_folder/unknown_child_folder/unknown_file_.new", + "file_type": "new" + } + ] + }, + { + "name": "unknown_file_.new", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/unknown_folder/unknown_file_.new", + "file_type": "new" + } + ] + }, + { + "name": "fast5_skip", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/", + "children": [ + { + "name": "myfile2.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile2.fast5", + "file_type": "fast5" + }, + { + "name": "myfile4.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile4.fast5", + "file_type": "fast5" + }, + { + "name": "myfile3.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile3.fast5", + "file_type": "fast5" + }, + { + "name": "myfile5.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile5.fast5", + "file_type": "fast5" + }, + { + "name": "myfile.fast5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile.fast5", + "file_type": "fast5" + } + ] + }, + { + "name": "pod5_skip", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/", + "children": [ + { + "name": "myfile2.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile2.pod5", + "file_type": "pod5" + }, + { + "name": "myfile4.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile4.pod5", + "file_type": "pod5" + }, + { + "name": "myfile3.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile3.pod5", + "file_type": "pod5" + }, + { + "name": "myfile5.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile5.pod5", + "file_type": "pod5" + }, + { + "name": "myfile.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile.pod5", + "file_type": "pod5" + } + ] + }, + { + "name": "basecalling", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling", + "children": [ + { + "name": "fastq_pass", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass", + "children": [ + { + "name": "myfile3.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile3.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile2.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile2.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile4.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile4.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile5.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile5.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile1.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile1.fastq.gz", + "file_type": "fastq.gz" + } + ] + }, + { + "name": "fastq_fail", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/", + "children": [ + { + "name": "myfile3.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile3.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile2.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile2.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile4.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile4.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile5.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile5.fastq.gz", + "file_type": "fastq.gz" + }, + { + "name": "myfile.fastq.gz", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile.fastq.gz", + "file_type": "fastq.gz" + } + ] + }, + { + "name": "guppy_basecall_client_log*.log", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/guppy_basecall_client_log*.log", + "file_type": "log" + } + ] + } + ] + } + ] +}