From d43dd4a5e656b9cbd3d392c4f19d346097c1a669 Mon Sep 17 00:00:00 2001 From: Steffengreiner Date: Thu, 24 Oct 2024 17:51:19 +0200 Subject: [PATCH 1/2] Provide Data Files and Folders for Bam and Pod5 registration --- .../datasets/OxfordNanoporeExperiment.groovy | 3 ++ .../datasets/OxfordNanoporeMeasurement.groovy | 48 +++++++++++-------- .../files/nanopore/BamFile.groovy | 30 ++++++++++++ .../folders/nanopore/BamFailFolder.groovy | 39 +++++++++++++++ .../folders/nanopore/BamPassFolder.groovy | 39 +++++++++++++++ 5 files changed, 140 insertions(+), 19 deletions(-) create mode 100644 src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BamFile.groovy create mode 100644 src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamFailFolder.groovy create mode 100644 src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamPassFolder.groovy diff --git a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy index a8179982b..5183fb5ad 100644 --- a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy +++ b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy @@ -244,6 +244,7 @@ final class OxfordNanoporeExperiment implements ExperimentFolder { FAST5_FILE(FQDN_FILES + ".Fast5File"), FASTQ_FILE(FQDN_FILES + ".FastQFile"), FASTQ_ZIPPED_FILE(FQDN_FILES + ".FastQZippedFile"), + BAM_FILE(FQDN_FILES + ".BamFile"), POD5_FILE(FQDN_FILES + ".Pod5File"), FINAL_SUMMARY_LOG(FQDN_FILES + ".FinalSummaryLog"), MUX_SCAN_DATA_LOG(FQDN_FILES + ".MuxScanDataLog"), @@ -308,6 +309,8 @@ final class OxfordNanoporeExperiment implements ExperimentFolder { POD5_PASS_FOLDER(FQDN_FOLDERS + ".Pod5PassFolder"), POD5_FAIL_FOLDER(FQDN_FOLDERS + ".Pod5FailFolder"), POD5_SKIP_FOLDER(FQDN_FOLDERS + ".Pod5SkipFolder"), + BAM_PASS_FOLDER(FQDN_FOLDERS + ".BamPassFolder"), + BAM_FAIL_FOLDER(FQDN_FOLDERS + ".BamFailFolder"), OTHER_REPORTS_FOLDER(FQDN_FOLDERS + ".OtherReportsFolder"), BASECALLING_FOLDER(FQDN_FOLDERS + ".BasecallingFolder"), diff --git a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy index 463075248..df002c9ea 100644 --- a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy +++ b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy @@ -102,6 +102,12 @@ final class OxfordNanoporeMeasurement { case Pod5SkipFolder: folders["pod5skip"] = element as Pod5SkipFolder break + case BamPassFolder: + folders["bampass"] = element as BamPassFolder + break + case BamFailFolder: + folders["bamfail"] = element as BamFailFolder + break case DataFile: logFilesCollection.add(element as DataFile) break @@ -118,10 +124,13 @@ final class OxfordNanoporeMeasurement { if (areFast5FoldersInMeasurement() && areFastQFoldersInMeasurement()) { isValid = true } - //// We need to ensure that pod5_skip and fast5_skip information is provided if dorado basecaller was used + //// We need to ensure that pod5_pass and pod5_fail information is provided if dorado basecaller was used if (arePod5FoldersInMeasurement()) { isValid = true } + if (areBamFoldersInMeasurement()) { + isValid = true + } if (isValid == false) { throw new IllegalStateException("No valid data is contained in measurement") } @@ -137,7 +146,11 @@ final class OxfordNanoporeMeasurement { } // Condition three: Don't allow empty Pod5 skip and fast5 skip folder private boolean arePod5FoldersInMeasurement() { - return isDataFolderInMeasurement("fast5skip") || isDataFolderInMeasurement("pod5skip") + return isDataFolderInMeasurement("pod5pass") || isDataFolderInMeasurement("pod5fail") + } + + private boolean areBamFoldersInMeasurement() { + return isDataFolderInMeasurement("bampass") || isDataFolderInMeasurement("bamfail") } private boolean isDataFolderInMeasurement(String string) { @@ -159,6 +172,10 @@ final class OxfordNanoporeMeasurement { * "fast5pass": DataFolder * "fastqfail": DataFolder * "fastqpass": DataFolder + * "bamfail": DataFolder + * "bampass": DataFolder + * "pod5fail": DataFolder + * "pod5pass": DataFolder * "Other sample code": // In case of pooled samples * ... * @return nested Map with sample codes and data folders @@ -310,25 +327,18 @@ final class OxfordNanoporeMeasurement { private Map> prepareRawData(String sampleId) { final def result = new HashMap() final def dataFolders = [ - "fast5fail" : (folders.get("fast5fail") as DataFolder), - "fast5pass" : (folders.get("fast5pass") as DataFolder), - "fastqpass" : (folders.get("fastqpass") as DataFolder), - "fastqfail" : (folders.get("fastqfail") as DataFolder) + "fast5fail": (folders.get("fast5fail") as DataFolder), + "fast5pass": (folders.get("fast5pass") as DataFolder), + "fast5skip": (folders.get("fast5skip") as DataFolder), + "fastqpass": (folders.get("fastqpass") as DataFolder), + "fastqfail": (folders.get("fastqfail") as DataFolder), + "pod5pass": (folders.get("pod5pass") as DataFolder), + "pod5fail": (folders.get("pod5fail") as DataFolder), + "pod5skip": (folders.get("pod5skip") as DataFolder), + "bampass": (folders.get("bampass") as DataFolder), + "bamfail": (folders.get("bamfail") as DataFolder) ] if (hasBasecallingData) dataFolders.put("basecalling", (folders.get("basecalling") as DataFolder)) - //Only add dorado based minimal required datafolders if present - if (folders.get("fast5skip") != null) { - dataFolders.put("fast5skip", (folders.get("fast5skip") as DataFolder)) - } - if (folders.get("pod5skip") != null) { - dataFolders.put("pod5skip", (folders.get("pod5skip") as DataFolder)) - } - if (folders.get("pod5fail") != null) { - dataFolders.put("pod5fail", (folders.get("pod5fail") as DataFolder)) - } - if (folders.get("pod5pass") != null) { - dataFolders.put("pod5pass", (folders.get("pod5pass") as DataFolder)) - } result.put(sampleId, dataFolders) return result } diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BamFile.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BamFile.groovy new file mode 100644 index 000000000..49e82ff79 --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BamFile.groovy @@ -0,0 +1,30 @@ +package life.qbic.datamodel.datasets.datastructure.files.nanopore + +import life.qbic.datamodel.datasets.datastructure.files.DataFile + +/** + * A specialisation of a DataFile, represents an Oxford Nanopore bam file + * + */ +class BamFile extends DataFile { + + final private static String FILE_TYPE = "bam" + + final private static String NAME_SCHEMA = /.*\.bam$/ + + protected BamFile(String name, String relativePath) { + super(name, relativePath, FILE_TYPE) + validateName() + } + + static BamFile create(String name, String relativePath) { + return new BamFile(name, relativePath) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore summary schema!") + } + } + +} diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamFailFolder.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamFailFolder.groovy new file mode 100644 index 000000000..a5adce73e --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamFailFolder.groovy @@ -0,0 +1,39 @@ +package life.qbic.datamodel.datasets.datastructure.folders.nanopore + +import life.qbic.datamodel.datasets.datastructure.folders.DataFolder + +/** + * A special case of a DataFolder, its name is always bam_fail. + * + * Its children field contains either a list of type List or List + * + */ +class BamFailFolder extends DataFolder { + + final private static String NAME_SCHEMA = /bam_fail/ + + protected BamFailFolder() {} + + protected BamFailFolder(String name, String relativePath, List children) { + super(name, relativePath, children) + validateName() + } + + /** + * Creates a new instance of a BamFailFolder object + * + * @param name The folder name + * @param relativePath The relative path of the folder + * @param children A list with child elements of the folder + * @return A new instance of a BamFailFolder object + */ + static BamFailFolder create(String name, String relativePath, List children) { + new BamFailFolder(name, relativePath, children) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore BamFail directory schema!") + } + } +} diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamPassFolder.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamPassFolder.groovy new file mode 100644 index 000000000..e41136a30 --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamPassFolder.groovy @@ -0,0 +1,39 @@ +package life.qbic.datamodel.datasets.datastructure.folders.nanopore + +import life.qbic.datamodel.datasets.datastructure.folders.DataFolder + +/** + * A special case of a DataFolder, its name is always bam_pass. + * + * Its children field contains either a list of type List or List + * + */ +class BamPassFolder extends DataFolder { + + final private static String NAME_SCHEMA = /bam_pass/ + + protected BamPassFolder() {} + + protected BamPassFolder(String name, String relativePath, List children) { + super(name, relativePath, children) + validateName() + } + + /** + * Creates a new instance of a BamPassFolder object + * + * @param name The folder name + * @param relativePath The relative path of the folder + * @param children A list with child elements of the folder + * @return A new instance of a BamPassFolder object + */ + static BamPassFolder create(String name, String relativePath, List children) { + new BamPassFolder(name, relativePath, children) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore BamPass directory schema!") + } + } +} From a472f512310ea22e5e53f5a9a16db81f718bd6e5 Mon Sep 17 00:00:00 2001 From: Steffengreiner Date: Thu, 24 Oct 2024 17:51:38 +0200 Subject: [PATCH 2/2] Provide tests and data structure for bam and pod5 registration --- .../OxfordNanoporeExperimentSpec.groovy | 37 ++++- .../OxfordNanoporeMeasurementSpec.groovy | 81 +++++++++- .../files/nanopore/BamFileSpec.groovy | 37 +++++ .../files/nanopore/Pod5FileSpec.groovy | 37 +++++ ...nimal-structure-bam-dorado-basecaller.json | 139 ++++++++++++++++++ ...mal-structure-pod5-dorado-basecaller.json} | 122 ++++----------- 6 files changed, 345 insertions(+), 108 deletions(-) create mode 100644 src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BamFileSpec.groovy create mode 100644 src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/Pod5FileSpec.groovy create mode 100644 src/test/resources/nanopore/valid-minimal-structure-bam-dorado-basecaller.json rename src/test/resources/nanopore/{valid-minimal-structure-dorado-basecaller.json => valid-minimal-structure-pod5-dorado-basecaller.json} (50%) diff --git a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy index fc051ce6b..5b01c3faf 100644 --- a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy +++ b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeExperimentSpec.groovy @@ -62,7 +62,10 @@ class OxfordNanoporeExperimentSpec extends Specification { Map minimalDataStructurePooled @Shared - Map minimalDoradoDataStructure + Map minimalPod5DoradoDataStructure + + @Shared + Map minimalBamDoradoDataStructure @Shared Map fullDoradoDataStructure @@ -96,8 +99,12 @@ class OxfordNanoporeExperimentSpec extends Specification { stream = Thread.currentThread().getContextClassLoader().getResourceAsStream(folder+"valid-minimal-structure-pooled.json") minimalDataStructurePooled = (Map) new JsonSlurper().parse(stream) // read in minimal required example with dorado based basecalling - stream = Thread.currentThread().getContextClassLoader().getResourceAsStream(folder+"valid-minimal-structure-dorado-basecaller.json") - minimalDoradoDataStructure = (Map) new JsonSlurper().parse(stream) + stream = Thread.currentThread().getContextClassLoader().getResourceAsStream(folder+"valid-minimal-structure-pod5-dorado-basecaller.json") + minimalPod5DoradoDataStructure = (Map) new JsonSlurper().parse(stream) + stream.close() + // read in minimal required example with dorado based basecalling + stream = Thread.currentThread().getContextClassLoader().getResourceAsStream(folder+"valid-minimal-structure-bam-dorado-basecaller.json") + minimalBamDoradoDataStructure = (Map) new JsonSlurper().parse(stream) stream.close() // read in minimal required example with dorado based basecalling stream = Thread.currentThread().getContextClassLoader().getResourceAsStream(folder+"valid-example-dorado-basecaller.json") @@ -210,9 +217,9 @@ class OxfordNanoporeExperimentSpec extends Specification { assert measurements[0].asicTemp == "32.631687" } - def "Create sample Oxford Nanopore experiment successfully for dorado basecaller generated minimal structure"() { + def "Create sample Oxford Nanopore experiment successfully for pod5 generated minimal structure"() { given: - final def example = minimalDoradoDataStructure + final def example = minimalPod5DoradoDataStructure when: final def experiment = OxfordNanoporeExperiment.create(example) @@ -222,8 +229,24 @@ class OxfordNanoporeExperimentSpec extends Specification { assert experiment.sampleCode == "QABCD001AB" assert measurements.size() == 1 assert measurements[0].asicTemp == "32.631687" - assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("pod5skip") - assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("fast5skip") + assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("pod5pass") + assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("pod5fail") + } + + def "Create sample Oxford Nanopore experiment successfully for bam generated minimal structure"() { + given: + final def example = minimalBamDoradoDataStructure + + when: + final def experiment = OxfordNanoporeExperiment.create(example) + final def measurements = experiment.getMeasurements() + + then: + assert experiment.sampleCode == "QABCD001AB" + assert measurements.size() == 1 + assert measurements[0].asicTemp == "32.631687" + assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("bampass") + assert measurements[0].getRawDataPerSample(experiment).get("QABCD001AB").containsKey("bamfail") } def "Create sample Oxford Nanopore experiment successfully for dorado basecaller generated full structure"() { diff --git a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeMeasurementSpec.groovy b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeMeasurementSpec.groovy index dfc827757..2d6779f1c 100644 --- a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeMeasurementSpec.groovy +++ b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/OxfordNanoporeMeasurementSpec.groovy @@ -2,8 +2,10 @@ package life.qbic.datamodel.datasets.datastructure import life.qbic.datamodel.datasets.ExperimentFolder import life.qbic.datamodel.datasets.OxfordNanoporeMeasurement +import life.qbic.datamodel.datasets.datastructure.files.nanopore.BamFile import life.qbic.datamodel.datasets.datastructure.files.nanopore.Fast5File import life.qbic.datamodel.datasets.datastructure.files.nanopore.FastQFile +import life.qbic.datamodel.datasets.datastructure.files.nanopore.Pod5File import life.qbic.datamodel.datasets.datastructure.folders.DataFolder import life.qbic.datamodel.datasets.datastructure.folders.nanopore.* import spock.lang.Shared @@ -38,9 +40,13 @@ class OxfordNanoporeMeasurementSpec extends Specification { @Shared UnclassifiedFastQFolder unclassifiedFastQFolder @Shared - Pod5SkipFolder pod5SkipFolder + Pod5PassFolder pod5PassedFolder @Shared - Fast5SkipFolder fast5SkipFolder + Pod5FailFolder pod5FailedFolder + @Shared + BamPassFolder bamPassedFolder + @Shared + BamFailFolder bamFailedFolder @Shared Map metaData @@ -62,6 +68,8 @@ class OxfordNanoporeMeasurementSpec extends Specification { ] def fast5File = Fast5File.create("test.fast5", "root/test.fast5") def fastQFile = FastQFile.create("test.fastq", "root/test.fastq") + def pod5File = Pod5File.create("test.pod5", "root/test.pod5") + def bamFile = BamFile.create("test.bam", "root/test.bam") fast5FailedFolder = Fast5FailFolder.create("fast5_fail","root/fast5_fail", [fast5File]) fast5PassedFolder = Fast5PassFolder.create("fast5_pass","root/fast5_pass", [fast5File]) fastQFailedFolder = FastQFailFolder.create("fastq_fail", "root/fastq_fail", [fastQFile]) @@ -74,6 +82,10 @@ class OxfordNanoporeMeasurementSpec extends Specification { // Content for the pooled samples including unclassified folders unclassifiedFast5Folder = UnclassifiedFast5Folder.create("unclassified", "fast5_fail/unclassified", [fast5File]) unclassifiedFastQFolder = UnclassifiedFastQFolder.create("unclassified", "fastq_pass/unclassified", [fastQFile]) + bamFailedFolder = BamFailFolder.create("bam_fail", "root/bam_fail", [bamFile]) + bamPassedFolder = BamPassFolder.create("bam_pass","root/bam_pass", [bamFile]) + pod5FailedFolder = Pod5FailFolder.create("pod5_fail", "root/pod5_fail", [pod5File]) + pod5PassedFolder = Pod5PassFolder.create("pod5_pass","root/pod5_pass", [pod5File]) } def "create simple measurement successfully"() { @@ -235,16 +247,33 @@ class OxfordNanoporeMeasurementSpec extends Specification { } - def "If both pod5 skip and fast5 skip folder are empty, an IllegalStateException shall be thrown"() { + def "If both pod5 pass and pod5 fail folder are empty, an IllegalStateException shall be thrown"() { given: - def emptyPod5SkipFolder = Pod5SkipFolder.create("pod5_skip","root/pod5_skip", []) - def emptyFast5SkipFolder = Fast5SkipFolder.create("fast5_skip","root/fast5_skip", []) + def emptyPod5PassFolder = Pod5PassFolder.create("pod5_pass","root/pod5_pass", []) + def emptyPod5FailFolder = Pod5FailFolder.create("pod5_fail","root/pod5_fail", []) when: OxfordNanoporeMeasurement.create( "20200219_1107_1-E3-H3_PAE26974_454b8dc6", "path/20200219_1107_1-E3-H3_PAE26974_454b8dc6", - [emptyPod5SkipFolder, emptyFast5SkipFolder], + [emptyPod5PassFolder, emptyPod5FailFolder], + metaData) + + then: + thrown(IllegalStateException) + + } + + def "If both bam pass and bam fail folder are empty, an IllegalStateException shall be thrown"() { + given: + def emptyBamPassFolder = BamPassFolder.create("bam_pass","root/bam_pass", []) + def emptyBamFailFolder = BamFailFolder.create("bam_fail","root/bam_fail", []) + + when: + OxfordNanoporeMeasurement.create( + "20200219_1107_1-E3-H3_PAE26974_454b8dc6", + "path/20200219_1107_1-E3-H3_PAE26974_454b8dc6", + [emptyBamPassFolder, emptyBamFailFolder], metaData) then: @@ -282,6 +311,46 @@ class OxfordNanoporeMeasurementSpec extends Specification { noExceptionThrown() } + def "If either bam pass or bam folder is empty, no IllegalStateException shall be thrown"() { + given: + def emptyBamFailFolder = BamFailFolder.create("bam_fail","root/bam_fail", []) + def emptyBamPassFolder = BamPassFolder.create("bam_pass","root/bam_pass", []) + when: + OxfordNanoporeMeasurement.create( + "20200219_1107_1-E3-H3_PAE26974_454b8dc6", + "path/20200219_1107_1-E3-H3_PAE26974_454b8dc6", + [bamPassedFolder, emptyBamFailFolder], + metaData) + OxfordNanoporeMeasurement.create( + "20200219_1107_1-E3-H3_PAE26974_454b8dc6", + "path/20200219_1107_1-E3-H3_PAE26974_454b8dc6", + [emptyBamPassFolder, bamFailedFolder], + metaData) + then: + noExceptionThrown() + } + + def "If either pod5 pass or pod5 folder is empty, no IllegalStateException shall be thrown"() { + given: + def emptyPod5FailedFolder = Pod5FailFolder.create("pod5_fail","root/pod5_fail", []) + def emptyPod5PassedFolder = Pod5PassFolder.create("pod5_pass","root/pod5_pass", []) + when: + OxfordNanoporeMeasurement.create( + "20200219_1107_1-E3-H3_PAE26974_454b8dc6", + "path/20200219_1107_1-E3-H3_PAE26974_454b8dc6", + [pod5PassedFolder, emptyPod5FailedFolder], + metaData) + + OxfordNanoporeMeasurement.create( + "20200219_1107_1-E3-H3_PAE26974_454b8dc6", + "path/20200219_1107_1-E3-H3_PAE26974_454b8dc6", + [emptyPod5PassedFolder, pod5FailedFolder], + metaData) + then: + noExceptionThrown() + } + + def "missing adapter metadata shall return an empty String and not be null"() { given: def metaData = [ diff --git a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BamFileSpec.groovy b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BamFileSpec.groovy new file mode 100644 index 000000000..98133b5a0 --- /dev/null +++ b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BamFileSpec.groovy @@ -0,0 +1,37 @@ +package life.qbic.datamodel.datasets.datastructure.files.nanopore + +import spock.lang.Specification + +/** + * + * + */ +class BamFileSpec extends Specification { + + def "shall create a BamFile instance"() { + given: + final name = "test_file.bam" + final relativePath = "root/test_file.bam" + + when: + def dataObject = BamFile.create(name, relativePath) + + then: + assert dataObject instanceof BamFile + assert dataObject.relativePath == relativePath + assert dataObject.name == name + } + + def "name not matching schema shall throw IllegalArgumentException"() { + given: + final name = "test_file.bum" + final relativePath = "root/test_file.bum" + + when: + def dataObject = BamFile.create(name, relativePath) + + then: + thrown(IllegalArgumentException) + } + +} diff --git a/src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/Pod5FileSpec.groovy b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/Pod5FileSpec.groovy new file mode 100644 index 000000000..edc6334b9 --- /dev/null +++ b/src/test/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/Pod5FileSpec.groovy @@ -0,0 +1,37 @@ +package life.qbic.datamodel.datasets.datastructure.files.nanopore + +import spock.lang.Specification + +/** + * + * + */ +class Pod5FileSpec extends Specification { + + def "shall create a Pod5 instance"() { + given: + final name = "test_file.pod5" + final relativePath = "root/test_file.pod5" + + when: + def dataObject = Pod5File.create(name, relativePath) + + then: + assert dataObject instanceof Pod5File + assert dataObject.relativePath == relativePath + assert dataObject.name == name + } + + def "name not matching schema shall throw IllegalArgumentException"() { + given: + final name = "test_file.pad4" + final relativePath = "root/test_file.pad4" + + when: + def dataObject = Pod5File.create(name, relativePath) + + then: + thrown(IllegalArgumentException) + } + +} diff --git a/src/test/resources/nanopore/valid-minimal-structure-bam-dorado-basecaller.json b/src/test/resources/nanopore/valid-minimal-structure-bam-dorado-basecaller.json new file mode 100644 index 000000000..d1b8c671d --- /dev/null +++ b/src/test/resources/nanopore/valid-minimal-structure-bam-dorado-basecaller.json @@ -0,0 +1,139 @@ +{ + "name": "QABCD001AB_E12A345a01_PAE12345", + "path": "./", + "children": [ + { + "name": "20200122_1217_1-A1-B1-PAE12345_1234567a", + "metadata": { + "adapter": "flongle", + "asic_temp": "32.631687", + "base_caller": "", + "base_caller_version": "3.2.8+bd67289", + "device_type": "promethion", + "flow_cell_id": "PAE26306", + "flow_cell_product_code": "FLO-PRO002", + "flow_cell_position": "2-A3-D3", + "hostname": "PCT0094", + "protocol": "sequencing/sequencing_PRO002_DNA:FLO-PRO002:SQK-LSK109:True", + "started": "2020-02-11T15:52:10.465982+01:00" + }, + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a", + "children": [ + { + "name": "report_.md", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/report_.md", + "file_type": "md" + }, + { + "name": "final_summary_.txt", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/final_summary_.txt", + "file_type": "txt" + }, + { + "name": "sequencing_summary_.txt", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/sequencing_summary_.txt", + "file_type": "txt" + }, + { + "name": "additional_file_.new", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/additional_file_.new", + "file_type": "new" + }, + { + "name": "not_relevant_file_.wow", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/not_relevant_file_.wow", + "file_type": "wow" + }, + { + "name": "unknown_folder", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/unknown_folder", + "children": [ + { + "name": "unknown_child_folder", + "path": "20200122_1217_1-A1-B1-PAE12345_1234567a/unknown_folder/unknown_child_folder", + "children": [ + { + "name": "unknown_file_.new", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/unknown_folder/unknown_child_folder/unknown_file_.new", + "file_type": "new" + } + ] + }, + { + "name": "unknown_file_.new", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/unknown_folder/unknown_file_.new", + "file_type": "new" + } + ] + }, + { + "name": "bam_pass", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/bam_pass/", + "children": [ + { + "name": "myfile2.bam", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/bam/myfile2.bam", + "file_type": "bam" + }, + { + "name": "myfile4.bam", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/bam_pass/myfile4.bam", + "file_type": "bam" + }, + { + "name": "myfile3.bam", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/bam_pass/myfile3.bam", + "file_type": "bam" + }, + { + "name": "myfile5.bam", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/bam_pass/myfile5.bam", + "file_type": "bam" + }, + { + "name": "myfile.bam", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/bam_pass/myfile.bam", + "file_type": "bam" + } + ] + }, + { + "name": "bam_fail", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/bam_fail/", + "children": [ + { + "name": "myfile2.bam", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/bam_fail/myfile2.bam", + "file_type": "bam" + }, + { + "name": "myfile4.bam", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/bam_fail/myfile4.bam", + "file_type": "bam" + }, + { + "name": "myfile3.bam", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/bam_fail/myfile3.bam", + "file_type": "bam" + }, + { + "name": "myfile5.bam", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/bam_fail/myfile5.bam", + "file_type": "bam" + }, + { + "name": "myfile.bam", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/bam_fail/myfile.bam", + "file_type": "bam" + } + ] + }, + { + "name": "guppy_basecall_client_log*.log", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/guppy_basecall_client_log*.log", + "file_type": "log" + } + ] + } + ] +} diff --git a/src/test/resources/nanopore/valid-minimal-structure-dorado-basecaller.json b/src/test/resources/nanopore/valid-minimal-structure-pod5-dorado-basecaller.json similarity index 50% rename from src/test/resources/nanopore/valid-minimal-structure-dorado-basecaller.json rename to src/test/resources/nanopore/valid-minimal-structure-pod5-dorado-basecaller.json index 9a60ed1cd..1c063b2c2 100644 --- a/src/test/resources/nanopore/valid-minimal-structure-dorado-basecaller.json +++ b/src/test/resources/nanopore/valid-minimal-structure-pod5-dorado-basecaller.json @@ -67,139 +67,71 @@ ] }, { - "name": "fast5_skip", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/", + "name": "pod5_pass", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_pass/", "children": [ { - "name": "myfile2.fast5", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile2.fast5", - "file_type": "fast5" + "name": "myfile2.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_pass/myfile2.pod5", + "file_type": "pod5" }, { - "name": "myfile4.fast5", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile4.fast5", - "file_type": "fast5" + "name": "myfile4.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_pass/myfile4.pod5", + "file_type": "pod5" }, { - "name": "myfile3.fast5", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile3.fast5", - "file_type": "fast5" + "name": "myfile3.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_pass/myfile3.pod5", + "file_type": "pod5" }, { - "name": "myfile5.fast5", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile5.fast5", - "file_type": "fast5" + "name": "myfile5.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_pass/myfile5.pod5", + "file_type": "pod5" }, { - "name": "myfile.fast5", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/fast5_skip/myfile.fast5", - "file_type": "fast5" + "name": "myfile.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_pass/myfile.pod5", + "file_type": "pod5" } ] }, { - "name": "pod5_skip", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/", + "name": "pod5_fail", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_fail/", "children": [ { "name": "myfile2.pod5", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile2.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_fail/myfile2.pod5", "file_type": "pod5" }, { "name": "myfile4.pod5", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile4.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_fail/myfile4.pod5", "file_type": "pod5" }, { "name": "myfile3.pod5", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile3.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_fail/myfile3.pod5", "file_type": "pod5" }, { "name": "myfile5.pod5", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile5.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_fail/myfile5.pod5", "file_type": "pod5" }, { "name": "myfile.pod5", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_skip/myfile.pod5", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/pod5_fail/myfile.pod5", "file_type": "pod5" } ] }, { - "name": "basecalling", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling", - "children": [ - { - "name": "fastq_pass", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass", - "children": [ - { - "name": "myfile3.fastq.gz", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile3.fastq.gz", - "file_type": "fastq.gz" - }, - { - "name": "myfile2.fastq.gz", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile2.fastq.gz", - "file_type": "fastq.gz" - }, - { - "name": "myfile4.fastq.gz", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile4.fastq.gz", - "file_type": "fastq.gz" - }, - { - "name": "myfile5.fastq.gz", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile5.fastq.gz", - "file_type": "fastq.gz" - }, - { - "name": "myfile1.fastq.gz", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_pass/myfile1.fastq.gz", - "file_type": "fastq.gz" - } - ] - }, - { - "name": "fastq_fail", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/", - "children": [ - { - "name": "myfile3.fastq.gz", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile3.fastq.gz", - "file_type": "fastq.gz" - }, - { - "name": "myfile2.fastq.gz", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile2.fastq.gz", - "file_type": "fastq.gz" - }, - { - "name": "myfile4.fastq.gz", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile4.fastq.gz", - "file_type": "fastq.gz" - }, - { - "name": "myfile5.fastq.gz", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile5.fastq.gz", - "file_type": "fastq.gz" - }, - { - "name": "myfile.fastq.gz", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/fastq_fail/myfile.fastq.gz", - "file_type": "fastq.gz" - } - ] - }, - { - "name": "guppy_basecall_client_log*.log", - "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/guppy_basecall_client_log*.log", - "file_type": "log" - } - ] + "name": "guppy_basecall_client_log*.log", + "path": "./20200122_1217_1-A1-B1-PAE12345_1234567a/basecalling/guppy_basecall_client_log*.log", + "file_type": "log" } ] }