-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for pod5 files generated via dorado basecaller (#368)
* Add support for pod5 files generated via dorado basecaller * remove unnecessary newline to trigger PR checks again * Add support for full dorado basecaller based structure * Update JD according to Code Review
- Loading branch information
1 parent
bc2d0d5
commit affe710
Showing
18 changed files
with
1,480 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
30 changes: 30 additions & 0 deletions
30
src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/Pod5File.groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package life.qbic.datamodel.datasets.datastructure.files.nanopore | ||
|
||
import life.qbic.datamodel.datasets.datastructure.files.DataFile | ||
|
||
/** | ||
* A specialisation of a DataFile, represents an Oxford Nanopore pod5 file | ||
* | ||
*/ | ||
class Pod5File extends DataFile { | ||
|
||
final private static String FILE_TYPE = "pod5" | ||
|
||
final private static String NAME_SCHEMA = /.*\.pod5$/ | ||
|
||
protected Pod5File(String name, String relativePath) { | ||
super(name, relativePath, FILE_TYPE) | ||
validateName() | ||
} | ||
|
||
static Pod5File create(String name, String relativePath) { | ||
return new Pod5File(name, relativePath) | ||
} | ||
|
||
private void validateName() { | ||
if (!(this.name =~ NAME_SCHEMA)) { | ||
throw new IllegalArgumentException("Name must match the Nanopore summary schema!") | ||
} | ||
} | ||
|
||
} |
38 changes: 38 additions & 0 deletions
38
...groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Fast5SkipFolder.groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
package life.qbic.datamodel.datasets.datastructure.folders.nanopore | ||
|
||
import life.qbic.datamodel.datasets.datastructure.files.nanopore.Fast5File | ||
import life.qbic.datamodel.datasets.datastructure.folders.DataFolder | ||
|
||
/** | ||
* A special case of a DataFolder, its name is always fast5_skip. | ||
* | ||
* Its children field contains a list of type List<Fast5Files> | ||
* | ||
*/ | ||
class Fast5SkipFolder extends DataFolder { | ||
|
||
final private static String NAME_SCHEMA = /fast5_skip/ | ||
|
||
protected Fast5SkipFolder() {} | ||
|
||
protected Fast5SkipFolder(String name, String relativePath, List<Fast5File> children) { | ||
super(name, relativePath, children) | ||
validateName() | ||
} | ||
|
||
/** | ||
* Creates a new instance of a Fast5SkipFolder object | ||
* @param relativePath The relative path of the folder | ||
* @param children A list with child elements of the folder | ||
* @return A new instance of a Fast5SkipFolder object | ||
*/ | ||
static Fast5SkipFolder create(String name, String relativePath, List<Fast5File> children) { | ||
return new Fast5SkipFolder(name, relativePath, children) | ||
} | ||
|
||
private void validateName() { | ||
if (!(this.name =~ NAME_SCHEMA)) { | ||
throw new IllegalArgumentException("Name must match the Nanopore Fast5Skip directory schema!") | ||
} | ||
} | ||
} |
39 changes: 39 additions & 0 deletions
39
.../groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Pod5FailFolder.groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
package life.qbic.datamodel.datasets.datastructure.folders.nanopore | ||
|
||
import life.qbic.datamodel.datasets.datastructure.folders.DataFolder | ||
|
||
/** | ||
* A special case of a DataFolder, its name is always pod5_fail. | ||
* | ||
* Its children field contains either a list of type List<Pod5Files> or List<Pod5Folder> | ||
* | ||
*/ | ||
class Pod5FailFolder extends DataFolder { | ||
|
||
final private static String NAME_SCHEMA = /pod5_fail/ | ||
|
||
protected Pod5FailFolder() {} | ||
|
||
protected Pod5FailFolder(String name, String relativePath, List children) { | ||
super(name, relativePath, children) | ||
validateName() | ||
} | ||
|
||
/** | ||
* Creates a new instance of a Pod5FailFolder object | ||
* | ||
* @param name The folder name | ||
* @param relativePath The relative path of the folder | ||
* @param children A list with child elements of the folder | ||
* @return A new instance of a Pod5FailFolder object | ||
*/ | ||
static Pod5FailFolder create(String name, String relativePath, List children) { | ||
new Pod5FailFolder(name, relativePath, children) | ||
} | ||
|
||
private void validateName() { | ||
if (!(this.name =~ NAME_SCHEMA)) { | ||
throw new IllegalArgumentException("Name must match the Nanopore Pod5Fail directory schema!") | ||
} | ||
} | ||
} |
39 changes: 39 additions & 0 deletions
39
.../groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Pod5PassFolder.groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
package life.qbic.datamodel.datasets.datastructure.folders.nanopore | ||
|
||
import life.qbic.datamodel.datasets.datastructure.folders.DataFolder | ||
|
||
/** | ||
* A special case of a DataFolder, its name is always pod5_pass. | ||
* | ||
* Its children field contains either a list of type List<Pod5Files> or List<Pod5Folder> | ||
* | ||
*/ | ||
class Pod5PassFolder extends DataFolder { | ||
|
||
final private static String NAME_SCHEMA = /pod5_pass/ | ||
|
||
protected Pod5PassFolder() {} | ||
|
||
protected Pod5PassFolder(String name, String relativePath, List<?> children) { | ||
super(name, relativePath, children) | ||
validateName() | ||
} | ||
|
||
/** | ||
* Creates a new instance of a Pod5PassFolder object | ||
* | ||
* @param name The folder name | ||
* @param relativePath The relative path of the folder | ||
* @param children A list with child elements of the folder | ||
* @return A new instance of a Pod5PassFolder object | ||
*/ | ||
static Pod5PassFolder create(String name, String relativePath, List<?> children) { | ||
new Pod5PassFolder(name, relativePath, children) | ||
} | ||
|
||
private void validateName() { | ||
if (!(this.name =~ NAME_SCHEMA)) { | ||
throw new IllegalArgumentException("Name must match the Nanopore Pod5Pass directory schema!") | ||
} | ||
} | ||
} |
38 changes: 38 additions & 0 deletions
38
.../groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/Pod5SkipFolder.groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
package life.qbic.datamodel.datasets.datastructure.folders.nanopore | ||
|
||
import life.qbic.datamodel.datasets.datastructure.files.nanopore.Pod5File | ||
import life.qbic.datamodel.datasets.datastructure.folders.DataFolder | ||
|
||
/** | ||
* A special case of a DataFolder, its name is always pod5_skip. | ||
* | ||
* Its children field contains a list of type List<Pod5Files> | ||
* | ||
*/ | ||
class Pod5SkipFolder extends DataFolder { | ||
|
||
final private static String NAME_SCHEMA = /pod5_skip/ | ||
|
||
protected Pod5SkipFolder() {} | ||
|
||
protected Pod5SkipFolder(String name, String relativePath, List<Pod5File> children) { | ||
super(name, relativePath, children) | ||
validateName() | ||
} | ||
|
||
/** | ||
* Creates a new instance of a Pod5SkipFolder object | ||
* @param relativePath The relative path of the folder | ||
* @param children A list with child elements of the folder | ||
* @return A new instance of a Pod5SkipFolder object | ||
*/ | ||
static Pod5SkipFolder create(String name, String relativePath, List<Pod5File> children) { | ||
return new Pod5SkipFolder(name, relativePath, children) | ||
} | ||
|
||
private void validateName() { | ||
if (!(this.name =~ NAME_SCHEMA)) { | ||
throw new IllegalArgumentException("Name must match the Nanopore Pod5Skip directory schema!") | ||
} | ||
} | ||
} |
19 changes: 19 additions & 0 deletions
19
...groovy/life/qbic/datamodel/instruments/OxfordNanoporeInstrumentOutputDoradoMinimal.groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
package life.qbic.datamodel.instruments | ||
|
||
|
||
/** | ||
* Represents the Nanopore instrument output data structure schema generated by employing the dorado basecaller with Pod5Files. | ||
* | ||
* The original schema is defined in as resource and is | ||
* referenced here, wrapped in a Groovy class for reference | ||
* in applications that want to validate the instrument | ||
* output structure against the schema. | ||
*/ | ||
class OxfordNanoporeInstrumentOutputDoradoMinimal { | ||
|
||
private static final String SCHEMA_PATH = "/schemas/nanopore-instrument-output_minimal_dorado.schema.json" | ||
|
||
static InputStream getSchemaAsStream() { | ||
return OxfordNanoporeInstrumentOutputDoradoMinimal.getResourceAsStream(SCHEMA_PATH) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
Oops, something went wrong.