From 2aa2ec9a17fbb71509d5915b4d01211ca629dc60 Mon Sep 17 00:00:00 2001 From: Kai Zhang Date: Thu, 9 May 2024 18:22:21 +0800 Subject: [PATCH] v2.6.2 --- snapatac2-core/Cargo.toml | 6 +- .../src/preprocessing/bam/mark_duplicates.rs | 15 +++-- snapatac2-core/src/preprocessing/qc.rs | 28 +++++---- snapatac2-python/Cargo.lock | 62 +++---------------- snapatac2-python/Cargo.toml | 6 +- snapatac2-python/src/preprocessing.rs | 3 +- 6 files changed, 43 insertions(+), 77 deletions(-) diff --git a/snapatac2-core/Cargo.toml b/snapatac2-core/Cargo.toml index c22127f2..faf52315 100644 --- a/snapatac2-core/Cargo.toml +++ b/snapatac2-core/Cargo.toml @@ -14,8 +14,8 @@ anndata = "0.3.3" anyhow = "1.0" bigtools = { version = "0.4", features = ["read", "write"] } bincode = "1.3" -bed-utils = "0.2" -extsort = "0.4" +bed-utils = "0.3" +extsort = "0.5" flate2 = "1.0" tokio = "1.34" hora = "0.1" @@ -36,4 +36,4 @@ serde = "1.0" statrs = "0.16" smallvec = "1.13" tempfile = "3.3" -zstd = { version = "0.13", features = ["zstdmt"] } \ No newline at end of file +zstd = { version = "0.13", features = ["zstdmt"] } diff --git a/snapatac2-core/src/preprocessing/bam/mark_duplicates.rs b/snapatac2-core/src/preprocessing/bam/mark_duplicates.rs index 47f4ed01..42bd5ef3 100644 --- a/snapatac2-core/src/preprocessing/bam/mark_duplicates.rs +++ b/snapatac2-core/src/preprocessing/bam/mark_duplicates.rs @@ -28,7 +28,7 @@ use noodles::{ use bed_utils::bed::{BEDLike, Strand}; use std::collections::HashMap; use itertools::Itertools; -use extsort::{sorter::Sortable, ExternalSorter}; +use extsort::{Sortable, ExternalSorter}; use bincode; use log::warn; use rayon::prelude::ParallelSliceMut; @@ -165,12 +165,16 @@ impl AlignmentInfo { } impl Sortable for AlignmentInfo { - fn encode(&self, writer: &mut W) { - bincode::serialize_into(writer, self).unwrap(); + fn encode(&self, writer: &mut W) -> std::io::Result<()> { + bincode::serialize_into(writer, self).map_err(|e| + std::io::Error::new(std::io::ErrorKind::Other, e.to_string()) + ) } - fn decode(reader: &mut R) -> Option { - bincode::deserialize_from(reader).ok() + fn decode(reader: &mut R) -> std::io::Result { + bincode::deserialize_from(reader).map_err(|e| + std::io::Error::new(std::io::ErrorKind::Other, e.to_string()) + ) } } @@ -420,6 +424,7 @@ where .then_with(|| a.unclipped_start.cmp(&b.unclipped_start)) .then_with(|| a.unclipped_end.cmp(&b.unclipped_end)) ).unwrap() + .map(|x| x.unwrap()) } RecordGroups { diff --git a/snapatac2-core/src/preprocessing/qc.rs b/snapatac2-core/src/preprocessing/qc.rs index 1664b757..061b333e 100644 --- a/snapatac2-core/src/preprocessing/qc.rs +++ b/snapatac2-core/src/preprocessing/qc.rs @@ -3,7 +3,7 @@ use anndata::data::CsrNonCanonical; use bed_utils::bed::{GenomicRange, BEDLike, tree::BedTree, ParseError, Strand}; use anyhow::Result; use serde::{Serialize, Deserialize}; -use extsort::sorter::Sortable; +use extsort::Sortable; use bincode; use smallvec::{SmallVec, smallvec}; @@ -22,13 +22,16 @@ pub struct Fragment { } impl Sortable for Fragment { - fn encode(&self, writer: &mut W) { - bincode::serialize_into(writer, self) - .unwrap_or_else(|e| panic!("Failed to serialize fragment: {}", e)); + fn encode(&self, writer: &mut W) -> std::io::Result<()> { + bincode::serialize_into(writer, self).map_err(|e| + std::io::Error::new(std::io::ErrorKind::Other, e.to_string()) + ) } - fn decode(reader: &mut R) -> Option { - bincode::deserialize_from(reader).ok() + fn decode(reader: &mut R) -> std::io::Result { + bincode::deserialize_from(reader).map_err(|e| + std::io::Error::new(std::io::ErrorKind::Other, e.to_string()) + ) } } @@ -141,13 +144,16 @@ pub struct Contact { } impl Sortable for Contact { - fn encode(&self, writer: &mut W) { - bincode::serialize_into(writer, self) - .unwrap_or_else(|e| panic!("Failed to serialize fragment: {}", e)); + fn encode(&self, writer: &mut W) -> std::io::Result<()> { + bincode::serialize_into(writer, self).map_err(|e| + std::io::Error::new(std::io::ErrorKind::Other, e.to_string()) + ) } - fn decode(reader: &mut R) -> Option { - bincode::deserialize_from(reader).ok() + fn decode(reader: &mut R) -> std::io::Result { + bincode::deserialize_from(reader).map_err(|e| + std::io::Error::new(std::io::ErrorKind::Other, e.to_string()) + ) } } diff --git a/snapatac2-python/Cargo.lock b/snapatac2-python/Cargo.lock index a083c4e0..a224e634 100644 --- a/snapatac2-python/Cargo.lock +++ b/snapatac2-python/Cargo.lock @@ -253,15 +253,15 @@ checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "bed-utils" -version = "0.2.1" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43420179bf4e3f57335f50092361dcc4229b8ccb79cb7c7bfe71aa1f5b8c105" +checksum = "f55747ca3b14034422f4a0fb4741ebb21d94c8a383f95fa44d138c1c1b05ae76" dependencies = [ "bincode", "bio", "extsort", "indexmap 2.2.3", - "itertools 0.8.2", + "itertools 0.12.1", "lexical", "num", "num-traits", @@ -306,9 +306,9 @@ dependencies = [ [[package]] name = "bio" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25dccfc5babf5a4f505ab5bdda0e18d4b5fc1600c222677c54992203632cbdf5" +checksum = "7a72cb93babf08c85b375c2938ac678cc637936b3ebb72266d433cec2577f6c2" dependencies = [ "anyhow", "approx 0.5.1", @@ -321,7 +321,6 @@ dependencies = [ "editdistancek", "enum-map", "fxhash", - "getset", "itertools 0.11.0", "itertools-num", "lazy_static", @@ -857,9 +856,9 @@ checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" [[package]] name = "extsort" -version = "0.4.2" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffc5bb6fbca3c5ce6a51f6857eab8c35c898b2fbcb62ff1b728243dd19ec0c9f" +checksum = "b55aeea941ed0cc991b26e54270a58fa2611708804d4b4f8a3f4c974ad161b2a" dependencies = [ "rayon", "skeptic", @@ -1040,18 +1039,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "getset" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e45727250e75cc04ff2846a66397da8ef2b3db8e40e0cef4df67950a07621eb9" -dependencies = [ - "proc-macro-error", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "gimli" version = "0.28.1" @@ -1291,15 +1278,6 @@ version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f958d3d68f4167080a18141e10381e7634563984a537f2a49a30fd8e53ac5767" -[[package]] -name = "itertools" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.10.5" @@ -2620,30 +2598,6 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" -[[package]] -name = "proc-macro-error" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn 1.0.109", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" -dependencies = [ - "proc-macro2", - "quote", - "version_check", -] - [[package]] name = "proc-macro2" version = "1.0.78" @@ -3266,7 +3220,7 @@ dependencies = [ [[package]] name = "snapatac2" -version = "2.6.2-dev0" +version = "2.6.2" dependencies = [ "anndata", "anndata-hdf5", diff --git a/snapatac2-python/Cargo.toml b/snapatac2-python/Cargo.toml index 01c916c7..ba0fecd3 100644 --- a/snapatac2-python/Cargo.toml +++ b/snapatac2-python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snapatac2" -version = "2.6.2-dev0" +version = "2.6.2" edition = "2021" authors = ["Kai Zhang "] description = "Rust APIs" @@ -15,9 +15,9 @@ snapatac2-core = { path = "../snapatac2-core" } anndata = "0.3.3" anndata-hdf5 = "0.2" pyanndata = "0.3.3" -extsort = "0.4" +extsort = "0.5" anyhow = "1.0" -bed-utils = "0.2" +bed-utils = "0.3" flate2 = "1.0" itertools = "0.12" indicatif = "0.17" diff --git a/snapatac2-python/src/preprocessing.rs b/snapatac2-python/src/preprocessing.rs index 2d448b12..716f94df 100644 --- a/snapatac2-python/src/preprocessing.rs +++ b/snapatac2-python/src/preprocessing.rs @@ -109,7 +109,7 @@ pub(crate) fn import_fragments( f }); let sorted_fragments: Box> = if !fragment_is_sorted_by_name { - Box::new(bed::sort_bed_by_key(fragments, |x| x.barcode.clone(), tempdir)) + Box::new(bed::sort_bed_by_key(fragments, |x| x.barcode.clone(), tempdir).map(|x| x.unwrap())) } else { Box::new(fragments) }; @@ -164,6 +164,7 @@ pub(crate) fn import_contacts( .with_sort_dir(tmp.path().to_path_buf()) .with_parallel_sort() .sort_by_key(contacts, |x| x.barcode.clone()).unwrap() + .map(|x| x.unwrap()) ) } else { Box::new(contacts)