From f97216d2a74146f0eb0ee9141c48412657c87ed8 Mon Sep 17 00:00:00 2001 From: Kai Zhang Date: Wed, 1 May 2024 23:13:50 +0800 Subject: [PATCH] upgrade anndata-rs and pyo3 --- snapatac2-core/Cargo.toml | 10 +- .../src/preprocessing/bam/mark_duplicates.rs | 4 +- snapatac2-python/Cargo.lock | 250 +++++++++++++----- snapatac2-python/Cargo.toml | 18 +- .../python/snapatac2/preprocessing/_basic.py | 12 + snapatac2-python/src/call_peaks.rs | 41 ++- snapatac2-python/src/embedding.rs | 42 +-- snapatac2-python/src/export.rs | 24 +- snapatac2-python/src/lib.rs | 4 +- snapatac2-python/src/motif.rs | 12 +- snapatac2-python/src/preprocessing.rs | 14 +- snapatac2-python/src/utils.rs | 62 ++--- 12 files changed, 317 insertions(+), 176 deletions(-) diff --git a/snapatac2-core/Cargo.toml b/snapatac2-core/Cargo.toml index 7ec86b0f9..c22127f20 100644 --- a/snapatac2-core/Cargo.toml +++ b/snapatac2-core/Cargo.toml @@ -10,7 +10,7 @@ homepage = "https://github.com/" keywords = ["single-cell", "biology"] [dependencies] -anndata = "0.3.2" +anndata = "0.3.3" anyhow = "1.0" bigtools = { version = "0.4", features = ["read", "write"] } bincode = "1.3" @@ -21,16 +21,16 @@ tokio = "1.34" hora = "0.1" kdtree = "0.7" itertools = "0.12" -indexmap = "2.0" +indexmap = "2.2" indicatif = {version = "0.17", features = ["rayon"] } lexical = "6.1" log = "0.4" ndarray = { version = "0.15", features = ["rayon"] } num = "0.4" -noodles = { version = "0.64", features = ["core", "bam", "sam", "gff", "gtf"] } +noodles = { version = "0.70", features = ["core", "bam", "sam", "gff", "gtf"] } nalgebra-sparse = "0.9" -polars = { version = "0.37", features = ["ndarray", "dtype-categorical"] } -rayon = "1.8" +polars = { version = "0.39", features = ["ndarray", "dtype-categorical"] } +rayon = "1.10" regex = "1.6" serde = "1.0" statrs = "0.16" diff --git a/snapatac2-core/src/preprocessing/bam/mark_duplicates.rs b/snapatac2-core/src/preprocessing/bam/mark_duplicates.rs index a1ad7d1c8..47f4ed013 100644 --- a/snapatac2-core/src/preprocessing/bam/mark_duplicates.rs +++ b/snapatac2-core/src/preprocessing/bam/mark_duplicates.rs @@ -333,7 +333,7 @@ impl FlagStat { } if !flags.is_unmapped() { - if flags.is_properly_aligned() { + if flags.is_properly_segmented() { self.proper_pair += 1; } @@ -383,7 +383,7 @@ where flagstat.update(r); let flag = r.flags(); let is_properly_aligned = !flag.is_supplementary() && - (!is_paired || flag.is_properly_aligned()); + (!is_paired || flag.is_properly_segmented()); let flag_pass = !flag.intersects(flag_failed); let mapq_pass = mapq_filter.map_or(true, |min_q| { let q = r.mapping_quality().map_or(255, |x| x.get()); diff --git a/snapatac2-python/Cargo.lock b/snapatac2-python/Cargo.lock index 2155e405d..39d74c1ed 100644 --- a/snapatac2-python/Cargo.lock +++ b/snapatac2-python/Cargo.lock @@ -62,9 +62,9 @@ dependencies = [ [[package]] name = "anndata" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02958ada4a05b3b68abb3d60ab7a72576b8ff518573d5c88647606f824335538" +checksum = "bb25ab5522d849dda564d6807e7cd17dac43a0fd10d14770c8f8d9b39d4a12be" dependencies = [ "anyhow", "flate2", @@ -176,9 +176,9 @@ checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" [[package]] name = "argminmax" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "202108b46429b765ef483f8a24d5c46f48c14acfdacc086dd4ab6dddf6bcdbd2" +checksum = "52424b59d69d69d5056d508b260553afd91c57e21849579cd1f50ee8b8b88eaa" dependencies = [ "num-traits", ] @@ -866,6 +866,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fast-float" version = "0.2.0" @@ -1824,9 +1830,9 @@ dependencies = [ [[package]] name = "noodles" -version = "0.64.0" +version = "0.70.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fa9a3104049a2a6b31b8b3cf414774832f28cc6fe3fb5fca21ed28e77c7f95f" +checksum = "e5c7777c4301ec50202f778c15d73b88c30f9240a074f9b9a98fe7babfa5bfc8" dependencies = [ "noodles-bam", "noodles-core", @@ -1837,9 +1843,9 @@ dependencies = [ [[package]] name = "noodles-bam" -version = "0.56.0" +version = "0.60.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3189e8ecee801ab5c3f4ea9908c4196b429137d8d35d733f00f6681f9188be7" +checksum = "880f71c52dab49e073361e0427610e07eccea07ff48a2ecd8f133c648cb115d8" dependencies = [ "bit-vec 0.6.3", "bstr", @@ -1854,9 +1860,9 @@ dependencies = [ [[package]] name = "noodles-bgzf" -version = "0.26.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8970db2e84adb1007377dd3988258d7a64e3fc4c05602ebf94e1f8cba207c030" +checksum = "eff82b0fb78c11947b29ef50e8ddf0093813fa9e613af0e13dc53fc12b2dc3ea" dependencies = [ "byteorder", "bytes", @@ -1872,9 +1878,9 @@ checksum = "7336c3be652de4e05444c9b12a32331beb5ba3316e8872d92bfdd8ef3b06c282" [[package]] name = "noodles-csi" -version = "0.30.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a60dfe0919f7ecbd081a82eb1d32e8f89f9041932d035fe8309073c8c01277bf" +checksum = "938d7d865a3fbb079c7855e76eb1ef0be5d285dc039fa7776622225c7f708411" dependencies = [ "bit-vec 0.6.3", "byteorder", @@ -1885,9 +1891,9 @@ dependencies = [ [[package]] name = "noodles-gff" -version = "0.27.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14f8ec87fe3630f57d6d8ea24cbc2cbd0bfed1fe66238bda7a7c3fb6a36d3713" +checksum = "9216634517bf888abb425b10f3df7857ee3f584d4e46c8d6a2bb2c84acc4e10e" dependencies = [ "indexmap 2.2.3", "noodles-bgzf", @@ -1898,9 +1904,9 @@ dependencies = [ [[package]] name = "noodles-gtf" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab70f06b15bbf2be2144fc6cbedd63666b23ba5fc2513a218973a01702457b2f" +checksum = "99d4fabc2e574e80c00341685e8f4df37ae0b5a00a6fecccfd7c99eb45d5a4cf" dependencies = [ "noodles-bgzf", "noodles-core", @@ -1909,9 +1915,9 @@ dependencies = [ [[package]] name = "noodles-sam" -version = "0.53.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f0d8e441368374f6e144989f823fd7c05e58cdaa3f97d22bb4d75b534327b87" +checksum = "6b0598e959a0e56fc60f11b3bc63bf11c332a530cb54883196c0eab1bd0d4b8a" dependencies = [ "bitflags 2.4.2", "bstr", @@ -2035,9 +2041,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "numpy" -version = "0.20.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef41cbb417ea83b30525259e30ccef6af39b31c240bda578889494c5392d331" +checksum = "ec170733ca37175f5d75a5bea5911d6ff45d2cd52849ce98b685394e4f2f37f4" dependencies = [ "libc", "ndarray", @@ -2132,6 +2138,12 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "parquet-format-safe" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1131c54b167dd4e4799ce762e1ab01549ebb94d5bdd13e6ec1b467491c378e1f" + [[package]] name = "parse-zoneinfo" version = "0.3.0" @@ -2246,25 +2258,29 @@ dependencies = [ [[package]] name = "polars" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e43795c49010cb851d45227caa17769e83760e21d260ba6285c563b754e1652f" +checksum = "0ea21b858b16b9c0e17a12db2800d11aa5b4bd182be6b3022eb537bbfc1f2db5" dependencies = [ "getrandom", + "polars-arrow", "polars-core", + "polars-error", "polars-io", "polars-lazy", "polars-ops", + "polars-parquet", "polars-sql", "polars-time", + "polars-utils", "version_check", ] [[package]] name = "polars-arrow" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faacd21a2548fa6d50c72d6b8d4649a8e029a0f3c6c5545b7f436f0610e49b0f" +checksum = "725b09f2b5ef31279b66e27bbab63c58d49d8f6696b66b1f46c7eaab95e80f75" dependencies = [ "ahash", "atoi", @@ -2307,23 +2323,25 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32d9dc87f8003ae0edeef5ad9ac92b2a345480bbe17adad64496113ae84706dd" +checksum = "a796945b14b14fbb79b91ef0406e6fddca2be636e889f81ea5d6ee7d36efb4fe" dependencies = [ "bytemuck", + "either", "num-traits", "polars-arrow", "polars-error", "polars-utils", + "strength_reduce", "version_check", ] [[package]] name = "polars-core" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "befd4d280a82219a01035c4f901319ceba65998c594d0c64f9a439cdee1d7777" +checksum = "465f70d3e96b6d0b1a43c358ba451286b8c8bd56696feff020d65702aa33e35c" dependencies = [ "ahash", "bitflags 2.4.2", @@ -2354,9 +2372,9 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f2435b02d1ba36d8c1f6a722cad04e4c0b2705a3112c5706e6960d405d7798" +checksum = "5224d5d05e6b8a6f78b75951ae1b5f82c8ab1979e11ffaf5fd41941e3d5b0757" dependencies = [ "polars-arrow-format", "regex", @@ -2366,9 +2384,9 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b51fba2cf014cb39c2b38353d601540fb9db643be65abb9ca8ff44b9c4c4a88e" +checksum = "b2c8589e418cbe4a48228d64b2a8a40284a82ec3c98817c0c2bcc0267701338b" dependencies = [ "ahash", "atoi_simd", @@ -2398,9 +2416,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83343e413346f048f3a5ad07c0ea4b5d0bada701a482878213142970b0ddff8" +checksum = "89b2632b1af668e2058d5f8f916d8fbde3cac63d03ae29a705f598e41dcfeb7f" dependencies = [ "ahash", "bitflags 2.4.2", @@ -2421,9 +2439,9 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6395f5fd5e1adf016fd6403c0a493181c1a349a7a145b2687cdf50a0d630310a" +checksum = "efdbdb4d9a92109bc2e0ce8e17af5ae8ab643bb5b7ee9d1d74f0aeffd1fbc95f" dependencies = [ "ahash", "argminmax", @@ -2449,11 +2467,30 @@ dependencies = [ "version_check", ] +[[package]] +name = "polars-parquet" +version = "0.39.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b421d2196f786fdfe162db614c8485f8308fe41575d4de634a39bbe460d1eb6a" +dependencies = [ + "ahash", + "base64", + "ethnum", + "num-traits", + "parquet-format-safe", + "polars-arrow", + "polars-error", + "polars-utils", + "seq-macro", + "simdutf8", + "streaming-decompression", +] + [[package]] name = "polars-pipe" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390a831b864bc57a4cb260b0595030dfb6a4260a3723cf8ca17968ee2078b8ff" +checksum = "48700f1d5bd56a15451e581f465c09541492750360f18637b196f995470a015c" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -2470,18 +2507,20 @@ dependencies = [ "polars-utils", "rayon", "smartstring", + "uuid", "version_check", ] [[package]] name = "polars-plan" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fb7d7527be2aa33baace9000f6772eb9df7cd57ec010a4b273435d2dc1349e8" +checksum = "2fb8e2302e20c44defd5be8cad9c96e75face63c3a5f609aced8c4ec3b3ac97d" dependencies = [ "ahash", "bytemuck", "chrono-tz", + "hashbrown 0.14.3", "once_cell", "percent-encoding", "polars-arrow", @@ -2491,6 +2530,7 @@ dependencies = [ "polars-time", "polars-utils", "rayon", + "recursive", "regex", "smartstring", "strum_macros", @@ -2499,10 +2539,11 @@ dependencies = [ [[package]] name = "polars-row" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4984d97aad3d0db92afe76ebcab10b5e37a1216618b5703ae0d2917ccd6168c" +checksum = "a515bdc68c2ae3702e3de70d89601f3b71ca8137e282a226dddb53ee4bacfa2e" dependencies = [ + "bytemuck", "polars-arrow", "polars-error", "polars-utils", @@ -2510,9 +2551,9 @@ dependencies = [ [[package]] name = "polars-sql" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77f62a8b8f93146ec1eb2ef340d77eeb174e8010035e449bfdd424d2b1fd944a" +checksum = "7b4bb7cc1c04c3023d1953b2f1dec50515e8fd8169a5a2bf4967b3b082232db7" dependencies = [ "hex", "polars-arrow", @@ -2528,9 +2569,9 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d75348a51d0c97f3b83df860ecb35a6ac6c5dafc6278cac4e1ac101d96dc753" +checksum = "efc18e3ad92eec55db89d88f16c22d436559ba7030cf76f86f6ed7a754b673f1" dependencies = [ "atoi", "chrono", @@ -2548,9 +2589,9 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.37.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f9c955bb1e9b55d835aeb7fe4e4e8826e01abe5f0ada979ceb7d2b9af7b569" +checksum = "c760b6c698cfe2fbbbd93d6cfb408db14ececfe1d92445dae2229ce1b5b21ae8" dependencies = [ "ahash", "bytemuck", @@ -2559,8 +2600,10 @@ dependencies = [ "num-traits", "once_cell", "polars-error", + "raw-cpuid", "rayon", "smartstring", + "stacker", "sysinfo", "version_check", ] @@ -2610,6 +2653,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + [[package]] name = "pulldown-cmark" version = "0.9.6" @@ -2623,9 +2675,9 @@ dependencies = [ [[package]] name = "pyanndata" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efbae630c991b958a0b50114f123d2d5987ed71a6e154f726a992b1b47455e27" +checksum = "0c30551f41c9b8c2d04ad34ee5029c3db6c321a5a0297901c7bb700d3100e91f" dependencies = [ "anndata", "anndata-hdf5", @@ -2650,9 +2702,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53bdbb96d49157e65d45cc287af5f32ffadd5f4761438b527b055fb0d4bb8233" +checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" dependencies = [ "anyhow", "cfg-if", @@ -2670,9 +2722,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deaa5745de3f5231ce10517a1f5dd97d53e5a2fd77aa6b5842292085831d48d7" +checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" dependencies = [ "once_cell", "target-lexicon", @@ -2680,9 +2732,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b42531d03e08d4ef1f6e85a2ed422eb678b8cd62b762e53891c05faf0d4afa" +checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" dependencies = [ "libc", "pyo3-build-config", @@ -2690,9 +2742,9 @@ dependencies = [ [[package]] name = "pyo3-log" -version = "0.9.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c10808ee7250403bedb24bc30c32493e93875fef7ba3e4292226fe924f398bd" +checksum = "2af49834b8d2ecd555177e63b273b708dea75150abc6f5341d0a6e1a9623976c" dependencies = [ "arc-swap", "log", @@ -2701,9 +2753,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7305c720fa01b8055ec95e484a6eca7a83c841267f0dd5280f0c8b8551d2c158" +checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2713,9 +2765,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c7e9b68bb9c3149c5b0cade5d07f953d6d125eb4337723c4ccdb665f1f96185" +checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" dependencies = [ "heck", "proc-macro2", @@ -2726,9 +2778,9 @@ dependencies = [ [[package]] name = "pyo3-polars" -version = "0.11.3" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fa311764163c831c75f9ca49499abacbf7ece676cad0b059d962a384aa18224" +checksum = "469bd1d378fb3a34c1b182383e84741d9e7c5451a5d29a3f9c557aac161876cd" dependencies = [ "polars", "polars-core", @@ -2803,6 +2855,15 @@ dependencies = [ "rand_core", ] +[[package]] +name = "raw-cpuid" +version = "11.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e29830cbb1290e404f24c73af91c5d8d631ce7e128691e9477556b540cd01ecd" +dependencies = [ + "bitflags 2.4.2", +] + [[package]] name = "rawpointer" version = "0.2.1" @@ -2811,9 +2872,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" [[package]] name = "rayon" -version = "1.8.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", @@ -2829,6 +2890,26 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.50", +] + [[package]] name = "redox_syscall" version = "0.2.16" @@ -3067,6 +3148,12 @@ dependencies = [ "serde", ] +[[package]] +name = "seq-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" + [[package]] name = "serde" version = "1.0.197" @@ -3268,6 +3355,19 @@ dependencies = [ "log", ] +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -3287,6 +3387,15 @@ dependencies = [ "rand", ] +[[package]] +name = "streaming-decompression" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf6cc3b19bfb128a8ad11026086e31d3ce9ad23f8ea37354b31383a187c44cf3" +dependencies = [ + "fallible-streaming-iterator", +] + [[package]] name = "streaming-iterator" version = "0.1.9" @@ -3569,6 +3678,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "uuid" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" +dependencies = [ + "getrandom", +] + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/snapatac2-python/Cargo.toml b/snapatac2-python/Cargo.toml index a85a8e3d4..50b2943c8 100644 --- a/snapatac2-python/Cargo.toml +++ b/snapatac2-python/Cargo.toml @@ -12,9 +12,9 @@ keywords = ["single-cell", "biology"] [dependencies] snapatac2-core = { path = "../snapatac2-core" } -anndata = "0.3.2" +anndata = "0.3.3" anndata-hdf5 = "0.2" -pyanndata = "0.3.2" +pyanndata = "0.3.3" extsort = "0.4" anyhow = "1.0" bed-utils = "0.2" @@ -25,18 +25,18 @@ linreg = "0.2" log = "0.4" linfa = "0.6" linfa-clustering = "0.6" -noodles = { version = "0.64", features = ["bam", "sam"] } -numpy = "0.20.0" +noodles = { version = "0.70", features = ["bam", "sam"] } +numpy = "0.21.0" nalgebra-sparse = "0.9" nalgebra = "0.32" ndarray = "0.15" -polars = { version = "0.37", features = ["ndarray", "dtype-categorical"] } -pyo3-log = "0.9" -pyo3-polars = "0.11.3" +polars = { version = "0.39", features = ["ndarray", "dtype-categorical"] } +pyo3-log = "0.10" +pyo3-polars = "0.13" rand_isaac = "0.3" rand_core = "0.6" rand = "0.8" -rayon = "1.8" +rayon = "1.10" statrs = "0.16" tempfile = "3.3" zstd = { version = "0.13", features = ["zstdmt"] } @@ -45,7 +45,7 @@ zstd = { version = "0.13", features = ["zstdmt"] } tikv-jemallocator = {version = "0.5", features = ["disable_initial_exec_tls"]} [dependencies.pyo3] -version = "0.20.2" +version = "0.21.2" features = ["extension-module", "anyhow"] [lib] diff --git a/snapatac2-python/python/snapatac2/preprocessing/_basic.py b/snapatac2-python/python/snapatac2/preprocessing/_basic.py index de3badab4..82d199c47 100644 --- a/snapatac2-python/python/snapatac2/preprocessing/_basic.py +++ b/snapatac2-python/python/snapatac2/preprocessing/_basic.py @@ -182,6 +182,18 @@ def import_data( For large fragment files, it is recommended to set `tempdir` to a location with sufficient space in order to avoid running out of disk space. + Warning + ------- + When the input to the function is a list of files, it employs multiprocessing + to process these files concurrently. In this case, however, it is crucial to + safeguard the entry point of the program by encapsulating the function call + within `if __name__ == '__main__':`. This condition ensures that the module + is being run as the main program and not being loaded as a module from + another script. Without this protection, each subprocess might attempt to + spawn its own subprocesses, leading to a cascade of process spawns—a situation + that can cause the program to hang or crash due to infinite recursion. + You don't need to do this in Jupyter notebook as it automatically does that. + Parameters ---------- fragment_file diff --git a/snapatac2-python/src/call_peaks.rs b/snapatac2-python/src/call_peaks.rs index d60ee92b6..d60796663 100644 --- a/snapatac2-python/src/call_peaks.rs +++ b/snapatac2-python/src/call_peaks.rs @@ -18,7 +18,7 @@ use polars::{ prelude::{DataFrame, NamedFrom}, series::Series, }; -use pyo3::prelude::*; +use pyo3::{prelude::*, pybacked::PyBackedStr}; use pyo3_polars::PyDataFrame; use rayon::iter::{ParallelBridge, ParallelIterator}; use std::collections::HashSet; @@ -67,8 +67,8 @@ pub fn py_merge_peaks<'py>( #[pyfunction] pub fn find_reproducible_peaks<'py>( - peaks: &'py PyAny, - replicates: Vec<&'py PyAny>, + peaks: &Bound<'py, PyAny>, + replicates: Vec>, blacklist: Option, ) -> Result { let black: BedTree<_> = if let Some(black) = blacklist { @@ -86,7 +86,7 @@ pub fn find_reproducible_peaks<'py>( .collect::>(); let replicates = replicates .into_iter() - .map(|x| BedTree::from_iter(get_peaks(x).unwrap().into_iter().map(|x| (x, ())))) + .map(|x| BedTree::from_iter(get_peaks(&x).unwrap().into_iter().map(|x| (x, ())))) .collect::>(); let peaks: Vec<_> = peaks .into_iter() @@ -97,7 +97,7 @@ pub fn find_reproducible_peaks<'py>( #[pyfunction] pub fn fetch_peaks<'py>( - peaks: HashMap, + peaks: HashMap>, blacklist: Option, ) -> Result> { let black: BedTree<_> = if let Some(black) = blacklist { @@ -111,7 +111,7 @@ pub fn fetch_peaks<'py>( peaks .into_iter() .map(|(key, peaks)| { - let ps = get_peaks(peaks)? + let ps = get_peaks(&peaks)? .into_iter() .filter(|x| !black.is_overlapped(x)) .collect::>(); @@ -209,7 +209,7 @@ fn narrow_peak_to_dataframe>( Ok(df) } -fn get_peaks<'py>(peak_io_obj: &'py PyAny) -> Result> { +fn get_peaks<'py>(peak_io_obj: &Bound<'py, PyAny>) -> Result> { peak_io_obj .getattr("peaks")? .downcast::() @@ -251,21 +251,20 @@ fn get_peaks<'py>(peak_io_obj: &'py PyAny) -> Result> { pub fn create_fwtrack_obj<'py>( py: Python<'py>, files: Vec, -) -> Result<(&'py PyAny, Vec<&'py PyAny>)> { - let macs = py.import("MACS3.Signal.FixWidthTrack")?; +) -> Result<(Bound<'py, PyAny>, Vec>)> { + let macs = py.import_bound("MACS3.Signal.FixWidthTrack")?; let merged = macs.getattr("FWTrack")?.call1((1000000,))?; let has_replicate = files.len() > 1; let replicates = files .into_iter() .map(|fl| { - let kwargs = pyo3::types::PyDict::new(py); + let kwargs = pyo3::types::PyDict::new_bound(py); kwargs.set_item("buffer_size", 100000)?; - let fwt = macs.getattr("FWTrack")?.call((), Some(kwargs))?; + let fwt = macs.getattr("FWTrack")?.call((), Some(&kwargs))?; let reader = utils::open_file_for_read(&fl); bed_utils::bed::io::Reader::new(reader, None) .into_records::() .try_for_each(|x| { - let _pool = unsafe { py.new_pool() }; // This is necessary to release memory of objects created in the loop let x = x?; let chr = x.chrom().as_bytes(); match x.strand() { @@ -300,7 +299,7 @@ pub fn create_fwtrack_obj<'py>( merged.call_method0("finalize")?; Ok((merged, replicates)) } else { - Ok((replicates[0], Vec::new())) + Ok((replicates.into_iter().next().unwrap(), Vec::new())) } } @@ -308,10 +307,10 @@ pub fn create_fwtrack_obj<'py>( pub fn export_tags( anndata: AnnDataLike, dir: PathBuf, - group_by: Vec<&str>, - replicates: Option>, + group_by: Vec, + replicates: Option>, max_frag_size: Option, - selections: Option>, + selections: Option>, ) -> Result>> { macro_rules! run { ($data:expr) => { @@ -332,10 +331,10 @@ pub fn export_tags( fn _export_tags>( data: &D, dir: P, - group_by: &Vec<&str>, - replicates: Option<&Vec<&str>>, + group_by: &Vec, + replicates: Option<&Vec>, max_frag_size: Option, - selections: Option>, + selections: Option>, ) -> Result>> { // Get keys ensure!(data.n_obs() == group_by.len(), "lengths differ"); @@ -343,9 +342,9 @@ fn _export_tags>( Some(rep) => group_by .iter() .zip(rep.iter()) - .map(|(x, y)| (*x, *y)) + .map(|(x, y)| (x.as_ref(), y.as_ref())) .collect(), - None => group_by.iter().map(|x| (*x, "")).collect(), + None => group_by.iter().map(|x| (x.as_ref(), "")).collect(), }; let mut unique_keys: HashSet<(&str, &str)> = keys.iter().cloned().unique().collect(); diff --git a/snapatac2-python/src/embedding.rs b/snapatac2-python/src/embedding.rs index 5c39d7aec..d193ba94e 100644 --- a/snapatac2-python/src/embedding.rs +++ b/snapatac2-python/src/embedding.rs @@ -23,11 +23,11 @@ use std::ops::Deref; pub(crate) fn spectral_embedding<'py>( py: Python<'py>, anndata: AnnDataLike, - selected_features: &PyAny, + selected_features: &Bound<'_, PyAny>, n_components: usize, random_state: i64, feature_weights: Option>, -) -> Result<(&'py PyArray1, &'py PyArray2)> { +) -> Result<(Bound<'py, PyArray1>, Bound<'py, PyArray2>)> { macro_rules! run { ($data:expr) => {{ let slice = pyanndata::data::to_select_elem(selected_features, $data.n_vars())?; @@ -46,8 +46,8 @@ pub(crate) fn spectral_embedding<'py>( let (evals, evecs) = crate::with_anndata!(&anndata, run)?; Ok(( - PyArray1::from_owned_array(py, evals), - PyArray2::from_owned_array(py, evecs), + PyArray1::from_owned_array_bound(py, evals), + PyArray2::from_owned_array_bound(py, evecs), )) } @@ -55,13 +55,13 @@ pub(crate) fn spectral_embedding<'py>( pub(crate) fn spectral_embedding_nystrom<'py>( py: Python<'py>, anndata: AnnDataLike, - selected_features: &PyAny, + selected_features: &Bound<'_, PyAny>, n_components: usize, sample_size: usize, weighted_by_degree: bool, chunk_size: usize, feature_weights: Option>, -) -> Result<(&'py PyArray1, &'py PyArray2)> { +) -> Result<(Bound<'py, PyArray1>, Bound<'py, PyArray2>)> { macro_rules! run { ($data:expr) => {{ let selected_features = @@ -115,8 +115,8 @@ pub(crate) fn spectral_embedding_nystrom<'py>( let (evals, evecs) = crate::with_anndata!(&anndata, run)?; Ok(( - PyArray1::from_owned_array(py, evals), - PyArray2::from_owned_array(py, evecs), + PyArray1::from_owned_array_bound(py, evals), + PyArray2::from_owned_array_bound(py, evecs), )) } @@ -145,7 +145,7 @@ fn spectral_mf( // Compute eigenvalues and eigenvectors let (v, u) = Python::with_gil(|py| { - let fun: Py = PyModule::from_code( + let fun: Py = PyModule::from_code_bound( py, "def eigen(X, D, k, seed): from scipy.sparse.linalg import LinearOperator, eigsh @@ -168,7 +168,7 @@ fn spectral_mf( .into(); let args = ( PyArrayData::from(ArrayData::from(input)), - PyArray1::from_iter(py, degree_inv.into_iter().copied()), + PyArray1::from_iter_bound(py, degree_inv.into_iter().copied()), n_components, random_state, ); @@ -204,11 +204,11 @@ where .axis_iter_mut(Axis(1)) .zip(evals.iter()) .for_each(|(mut col, v)| col *= v.recip()); - let evecs_py = PyArray2::from_array(py, evecs); - let evals_py = PyArray1::from_array(py, evals); + let evecs_py = PyArray2::from_array_bound(py, evecs); + let evals_py = PyArray1::from_array_bound(py, evals); let seed_matrix_py = PyArrayData::from(ArrayData::from(seed_matrix)).into_py(py); - let nystrom_py: Py = PyModule::from_code( + let nystrom_py: Py = PyModule::from_code_bound( py, "def nystrom(seed, sample, evecs, evals): import numpy as np @@ -234,8 +234,8 @@ where let args = ( &seed_matrix_py, PyArrayData::from(ArrayData::from(sample)), - evecs_py, - evals_py, + &evecs_py, + &evals_py, ); nystrom_py .call1(py, args) @@ -380,11 +380,11 @@ fn hstack(m1: CsrMatrix, m2: CsrMatrix) -> CsrMatrix { pub(crate) fn multi_spectral_embedding<'py>( py: Python<'py>, anndata: Vec, - selected_features: Vec<&PyAny>, + selected_features: Vec>, weights: Vec, n_components: usize, random_state: i64, -) -> Result<(&'py PyArray1, &'py PyArray2)> { +) -> Result<(Bound<'py, PyArray1>, Bound<'py, PyArray2>)> { info!("Compute normalized views..."); let mats = anndata .into_iter() @@ -392,7 +392,7 @@ pub(crate) fn multi_spectral_embedding<'py>( .map(|(a, s)| { macro_rules! get_mat { ($data:expr) => {{ - let slice = pyanndata::data::to_select_elem(s, $data.n_vars()) + let slice = pyanndata::data::to_select_elem(&s, $data.n_vars()) .expect("Invalid feature selection"); let mut mat: CsrMatrix = $data.x().slice_axis(1, slice).unwrap().expect("X is None"); @@ -432,14 +432,14 @@ pub(crate) fn multi_spectral_embedding<'py>( info!("Compute embedding..."); let (evals, evecs, _) = spectral_mf(mat, n_components, random_state)?; Ok(( - PyArray1::from_owned_array(py, evals), - PyArray2::from_owned_array(py, evecs), + PyArray1::from_owned_array_bound(py, evals), + PyArray2::from_owned_array_bound(py, evecs), )) } fn frobenius_norm(x: &CsrMatrix) -> f64 { let sum: f64 = Python::with_gil(|py| { - let fun: Py = PyModule::from_code( + let fun: Py = PyModule::from_code_bound( py, "def f(X): import numpy as np diff --git a/snapatac2-python/src/export.rs b/snapatac2-python/src/export.rs index a441c6b16..c639b07e3 100644 --- a/snapatac2-python/src/export.rs +++ b/snapatac2-python/src/export.rs @@ -1,10 +1,10 @@ use crate::utils::AnnDataLike; use snapatac2_core::{export::{Exporter, Normalization, CoverageOutputFormat}, utils}; +use pyo3::{prelude::*, pybacked::PyBackedStr}; use std::ops::Deref; use anndata::Backend; use anndata_hdf5::H5; -use pyo3::prelude::*; use std::{collections::{HashSet, HashMap}, path::PathBuf}; use anyhow::Result; use bed_utils::bed::{GenomicRange, io::Reader, tree::BedTree}; @@ -13,17 +13,21 @@ use std::str::FromStr; #[pyfunction] pub fn export_fragments( anndata: AnnDataLike, - barcodes: Vec<&str>, - group_by: Vec<&str>, + barcodes: Vec, + group_by: Vec, dir: PathBuf, prefix: &str, suffix: &str, - selections: Option>, + selections: Option>, min_frag_length: Option, max_frag_length: Option, compression: Option<&str>, compression_level: Option, ) -> Result> { + let barcodes = barcodes.iter().map(|x| x.as_ref()).collect(); + let group_by = group_by.iter().map(|x| x.as_ref()).collect(); + let selections = selections.as_ref() + .map(|s| s.iter().map(|x| x.as_ref()).collect()); macro_rules! run { ($data:expr) => { $data.export_fragments( @@ -38,16 +42,16 @@ pub fn export_fragments( #[pyfunction] pub fn export_coverage( anndata: AnnDataLike, - group_by: Vec<&str>, + group_by: Vec, resolution: usize, dir: PathBuf, prefix: &str, suffix:&str, output_format: &str, - selections: Option>, + selections: Option>, blacklist: Option, normalization: Option<&str>, - ignore_for_norm: Option>, + ignore_for_norm: Option>, min_frag_length: Option, max_frag_length: Option, compression: Option<&str>, @@ -55,6 +59,12 @@ pub fn export_coverage( temp_dir: Option, num_threads: Option, ) -> Result> { + let group_by = group_by.iter().map(|x| x.as_ref()).collect(); + let selections = selections.as_ref() + .map(|s| s.iter().map(|x| x.as_ref()).collect()); + let ignore_for_norm = ignore_for_norm.as_ref() + .map(|s| s.iter().map(|x| x.as_ref()).collect()); + let black: Option> = blacklist.map(|black| { Reader::new(utils::open_file_for_read(black), None) .into_records::() diff --git a/snapatac2-python/src/lib.rs b/snapatac2-python/src/lib.rs index c1455704a..bc58c72cc 100644 --- a/snapatac2-python/src/lib.rs +++ b/snapatac2-python/src/lib.rs @@ -7,7 +7,7 @@ mod network; mod motif; mod knn; -use pyo3::{prelude::*, PyResult, Python}; +use pyo3::{prelude::*, PyResult}; use pyanndata; #[cfg(not(target_env = "msvc"))] @@ -18,7 +18,7 @@ use tikv_jemallocator::Jemalloc; static GLOBAL: Jemalloc = Jemalloc; #[pymodule] -fn _snapatac2(_py: Python, m: &PyModule) -> PyResult<()> { +fn _snapatac2(m: &Bound<'_, PyModule>) -> PyResult<()> { pyo3_log::init(); // AnnData related functions diff --git a/snapatac2-python/src/motif.rs b/snapatac2-python/src/motif.rs index e3ac1fee8..6f43f5268 100644 --- a/snapatac2-python/src/motif.rs +++ b/snapatac2-python/src/motif.rs @@ -1,4 +1,4 @@ -use pyo3::prelude::*; +use pyo3::{prelude::*, pybacked::PyBackedStr}; use numpy::{Ix2, PyReadonlyArray}; use std::fs::File; use std::path::Path; @@ -18,7 +18,7 @@ impl PyDNAMotif { #[new] fn new<'py>( id: &str, - matrix: &'py PyAny, + matrix: Bound<'py, PyAny>, ) -> Self { let pwm: PyReadonlyArray = matrix.extract().unwrap(); let motif = motif::DNAMotif { @@ -91,12 +91,12 @@ impl PyDNAMotifScanner { } #[pyo3(signature = (seqs, pvalue=1e-5, rc=true))] - fn exists(&self, seqs: Vec<&str>, pvalue: f64, rc: bool) -> Vec { - seqs.into_par_iter().map(|x| self.exist(x, pvalue, rc)).collect() + fn exists(&self, seqs: Vec, pvalue: f64, rc: bool) -> Vec { + seqs.into_par_iter().map(|x| self.exist(x.as_ref(), pvalue, rc)).collect() } #[pyo3(signature = (seqs, pvalue=1e-5))] - fn with_background(&self, seqs: Vec<&str>, pvalue: f64) -> PyDNAMotifTest { + fn with_background(&self, seqs: Vec, pvalue: f64) -> PyDNAMotifTest { let n = seqs.len(); PyDNAMotifTest { scanner: self.clone(), @@ -133,7 +133,7 @@ impl PyDNAMotifTest { #[getter] fn name(&self) -> Option { self.scanner.name() } - fn test(&self, seqs: Vec<&str>) -> (f64, f64) { + fn test(&self, seqs: Vec) -> (f64, f64) { let n = seqs.len().try_into().unwrap(); let occurrence: u64 = seqs.into_par_iter() .filter(|x| self.scanner.exist(x, self.pvalue, true)).count() diff --git a/snapatac2-python/src/preprocessing.rs b/snapatac2-python/src/preprocessing.rs index 0e7d737f1..7040deae3 100644 --- a/snapatac2-python/src/preprocessing.rs +++ b/snapatac2-python/src/preprocessing.rs @@ -1,5 +1,6 @@ use crate::utils::*; +use pyo3::{prelude::*, pybacked::PyBackedStr}; use anndata::Backend; use anndata_hdf5::H5; use snapatac2_core::preprocessing::count_data::TranscriptParserOptions; @@ -7,7 +8,6 @@ use snapatac2_core::preprocessing::count_data::CountingStrategy; use std::io::{BufRead, BufReader}; use std::path::PathBuf; use std::{str::FromStr, collections::BTreeMap, ops::Deref, collections::HashSet}; -use pyo3::prelude::*; use bed_utils::{bed, bed::GenomicRange}; use pyanndata::PyAnnData; use anyhow::Result; @@ -72,7 +72,7 @@ pub(crate) fn make_fragment_file( pub(crate) fn import_fragments( anndata: AnnDataLike, fragment_file: PathBuf, - chrom_size: BTreeMap<&str, u64>, + chrom_size: BTreeMap, mitochondrial_dna: Vec, min_num_fragment: u64, fragment_is_sorted_by_name: bool, @@ -142,7 +142,7 @@ fn shift_fragment(fragment: &mut Fragment, shift_left: i64, shift_right: i64) { pub(crate) fn import_contacts( anndata: AnnDataLike, contact_file: PathBuf, - chrom_size: BTreeMap<&str, u64>, + chrom_size: BTreeMap, fragment_is_sorted_by_name: bool, chunk_size: usize, tempdir: Option, @@ -194,12 +194,14 @@ pub(crate) fn mk_tile_matrix( bin_size: usize, chunk_size: usize, strategy: &str, - exclude_chroms: Option>, + exclude_chroms: Option>, min_fragment_size: Option, max_fragment_size: Option, out: Option ) -> Result<()> { + let exclude_chroms = exclude_chroms.as_ref() + .map(|s| s.iter().map(|x| x.as_ref()).collect::>()); macro_rules! run { ($data:expr) => { if let Some(out) = out { @@ -240,7 +242,7 @@ pub(crate) fn mk_tile_matrix( #[pyfunction] pub(crate) fn mk_peak_matrix( anndata: AnnDataLike, - peaks: &PyAny, + peaks: Bound<'_, PyAny>, chunk_size: usize, use_x: bool, strategy: &str, @@ -339,7 +341,7 @@ pub(crate) fn tss_enrichment( #[pyfunction] pub(crate) fn add_frip( anndata: AnnDataLike, - regions: BTreeMap>, + regions: BTreeMap>, ) -> Result>> { let trees: Vec<_> = regions.values().map(|x| diff --git a/snapatac2-python/src/utils.rs b/snapatac2-python/src/utils.rs index 0e08c6a8f..9d5ad6b5e 100644 --- a/snapatac2-python/src/utils.rs +++ b/snapatac2-python/src/utils.rs @@ -7,7 +7,7 @@ use pyo3::{ types::PyIterator, PyResult, Python, }; -use numpy::{Element, PyReadonlyArrayDyn, PyReadonlyArray, Ix1, Ix2, PyArray, IntoPyArray}; +use numpy::{Element, PyReadonlyArrayDyn, PyReadonlyArray, Ix1, Ix2, PyArray, IntoPyArray, PyArrayMethods}; use snapatac2_core::preprocessing::count_data::TranscriptParserOptions; use snapatac2_core::preprocessing::{Transcript, read_transcripts_from_gff, read_transcripts_from_gtf}; use snapatac2_core::utils; @@ -45,10 +45,10 @@ macro_rules! with_sparsity_pattern { #[pyfunction] pub(crate) fn jaccard_similarity<'py>( py: Python<'py>, - mat: &'py PyAny, - other: Option<&'py PyAny>, + mat: &Bound<'py, PyAny>, + other: Option<&Bound<'py, PyAny>>, weights: Option>, -) -> PyResult<&'py PyArray> { +) -> PyResult>> { let weights_ = match weights { None => None, Some(ref ws) => Some(ws.as_slice().unwrap()), @@ -57,10 +57,10 @@ pub(crate) fn jaccard_similarity<'py>( macro_rules! with_csr { ($mat:expr) => { match other { - None => Ok(utils::similarity::jaccard($mat, weights_).into_pyarray(py)), + None => Ok(utils::similarity::jaccard($mat, weights_).into_pyarray_bound(py)), Some(mat2) => { macro_rules! xxx { - ($m:expr) => { Ok(utils::similarity::jaccard2($mat, $m, weights_).into_pyarray(py)) }; + ($m:expr) => { Ok(utils::similarity::jaccard2($mat, $m, weights_).into_pyarray_bound(py)) }; } let shape: Vec = mat2.getattr("shape")?.extract()?; with_sparsity_pattern!( @@ -101,22 +101,22 @@ where #[pyfunction] pub(crate) fn cosine_similarity<'py>( py: Python<'py>, - mat: &'py PyAny, - other: Option<&'py PyAny>, + mat: &Bound<'py, PyAny>, + other: Option<&Bound<'py, PyAny>>, weights: Option>, -) -> PyResult<&'py PyArray> { +) -> PyResult>> { let weights_ = match weights { None => None, Some(ref ws) => Some(ws.as_slice().unwrap()), }; match other { - None => Ok(utils::similarity::cosine(csr_to_rust(mat)?, weights_).into_pyarray(py)), + None => Ok(utils::similarity::cosine(csr_to_rust(mat)?, weights_).into_pyarray_bound(py)), Some(mat2) => Ok( utils::similarity::cosine2( csr_to_rust(mat)?, csr_to_rust(mat2)?, weights_, - ).into_pyarray(py) + ).into_pyarray_bound(py) ), } } @@ -124,19 +124,19 @@ pub(crate) fn cosine_similarity<'py>( #[pyfunction] pub(crate) fn pearson<'py>( py: Python<'py>, - mat: &'py PyAny, - other: &'py PyAny, + mat: &Bound<'py, PyAny>, + other: &Bound<'py, PyAny>, ) -> PyResult { match mat.getattr("dtype")?.getattr("name")?.extract()? { "float32" => { let mat_ = mat.extract::>()?.to_owned_array(); let other_ = other.extract::>()?.to_owned_array(); - Ok(utils::similarity::pearson2(mat_, other_).into_pyarray(py).to_object(py)) + Ok(utils::similarity::pearson2(mat_, other_).into_pyarray_bound(py).to_object(py)) }, "float64" => { let mat_ = mat.extract::>()?.to_owned_array(); let other_ = other.extract::>()?.to_owned_array(); - Ok(utils::similarity::pearson2(mat_, other_).into_pyarray(py).to_object(py)) + Ok(utils::similarity::pearson2(mat_, other_).into_pyarray_bound(py).to_object(py)) }, ty => panic!("Cannot compute correlation for type {}", ty), } @@ -145,8 +145,8 @@ pub(crate) fn pearson<'py>( #[pyfunction] pub(crate) fn spearman<'py>( py: Python<'py>, - mat: &'py PyAny, - other: &'py PyAny, + mat: &Bound<'py, PyAny>, + other: &Bound<'py, PyAny>, ) -> PyResult { match mat.getattr("dtype")?.getattr("name")?.extract()? { "float32" => { @@ -154,11 +154,11 @@ pub(crate) fn spearman<'py>( match other.getattr("dtype")?.getattr("name")?.extract()? { "float32" => { let other_ = other.extract::>()?.to_owned_array(); - Ok(utils::similarity::spearman2(mat_, other_).into_pyarray(py).to_object(py)) + Ok(utils::similarity::spearman2(mat_, other_).into_pyarray_bound(py).to_object(py)) }, "float64" => { let other_ = other.extract::>()?.to_owned_array(); - Ok(utils::similarity::spearman2(mat_, other_).into_pyarray(py).to_object(py)) + Ok(utils::similarity::spearman2(mat_, other_).into_pyarray_bound(py).to_object(py)) }, ty => panic!("Cannot compute correlation for type {}", ty), } @@ -168,11 +168,11 @@ pub(crate) fn spearman<'py>( match other.getattr("dtype")?.getattr("name")?.extract()? { "float32" => { let other_ = other.extract::>()?.to_owned_array(); - Ok(utils::similarity::spearman2(mat_, other_).into_pyarray(py).to_object(py)) + Ok(utils::similarity::spearman2(mat_, other_).into_pyarray_bound(py).to_object(py)) }, "float64" => { let other_ = other.extract::>()?.to_owned_array(); - Ok(utils::similarity::spearman2(mat_, other_).into_pyarray(py).to_object(py)) + Ok(utils::similarity::spearman2(mat_, other_).into_pyarray_bound(py).to_object(py)) }, ty => panic!("Cannot compute correlation for type {}", ty), } @@ -181,17 +181,17 @@ pub(crate) fn spearman<'py>( } } -fn csr_to_rust<'py>(csr: &'py PyAny) -> PyResult> { +fn csr_to_rust<'py>(csr: &Bound<'py, PyAny>) -> PyResult> { let shape: Vec = csr.getattr("shape")?.extract()?; - let indices = cast_pyarray(csr.getattr("indices")?)?; - let indptr = cast_pyarray(csr.getattr("indptr")?)?; - let data = cast_pyarray(csr.getattr("data")?)?; + let indices = cast_pyarray(&csr.getattr("indices")?)?; + let indptr = cast_pyarray(&csr.getattr("indptr")?)?; + let data = cast_pyarray(&csr.getattr("data")?)?; Ok(CsrMatrix::try_from_csr_data( shape[0], shape[1], indptr, indices, data, ).unwrap()) } -fn cast_pyarray<'py, T: Element>(arr: &'py PyAny) -> PyResult> { +fn cast_pyarray<'py, T: Element>(arr: &Bound<'py, PyAny>) -> PyResult> { let vec = match arr.getattr("dtype")?.getattr("name")?.extract()? { "uint32" => arr.extract::>()?.cast(false)?.to_vec().unwrap(), "int32" => arr.extract::>()?.cast(false)?.to_vec().unwrap(), @@ -206,7 +206,7 @@ fn cast_pyarray<'py, T: Element>(arr: &'py PyAny) -> PyResult> { /// Simple linear regression #[pyfunction] -pub(crate) fn simple_lin_reg(py_iter: &PyIterator) -> PyResult<(f64, f64)> { +pub(crate) fn simple_lin_reg(py_iter: Bound<'_, PyIterator>) -> PyResult<(f64, f64)> { Ok(lin_reg_imprecise(py_iter.map(|x| x.unwrap().extract().unwrap())).unwrap()) } @@ -234,10 +234,10 @@ pub(crate) fn read_regions(file: PathBuf) -> Vec { } #[pyfunction] -pub(crate) fn intersect_bed<'py>(regions: &'py PyAny, bed_file: &str) -> PyResult> { +pub(crate) fn intersect_bed<'py>(regions: Bound<'py, PyAny>, bed_file: &str) -> PyResult> { let bed_tree: bed::tree::BedTree<()> = bed::io::Reader::new(utils::open_file_for_read(bed_file), None) .into_records().map(|x: Result, _>| (x.unwrap(), ())).collect(); - let res = PyIterator::from_object(regions)? + let res = PyIterator::from_bound_object(®ions)? .map(|x| bed_tree.is_overlapped(&GenomicRange::from_str(x.unwrap().extract().unwrap()).unwrap())) .collect(); Ok(res) @@ -248,14 +248,14 @@ pub(crate) fn kmeans<'py>( py: Python<'py>, n_clusters: usize, observations_: PyReadonlyArray<'_, f64, Ix2>, -) -> PyResult<&'py PyArray> { +) -> PyResult>> { let seed = 42; let rng: Isaac64Rng = SeedableRng::seed_from_u64(seed); let observations = DatasetBase::from(observations_.as_array()); let model = KMeans::params_with_rng(n_clusters, rng) .fit(&observations) .expect("KMeans fitted"); - Ok(model.predict(observations).targets.into_pyarray(py)) + Ok(model.predict(observations).targets.into_pyarray_bound(py)) } pub fn read_transcripts>(file_path: P, options: &TranscriptParserOptions) -> Vec {