Skip to content

Commit

Permalink
Merge branch 'dev' of https://github.com/opentargets/gentropy into il…
Browse files Browse the repository at this point in the history
…-optimise-l2g-coloc
  • Loading branch information
ireneisdoomed committed Mar 21, 2024
2 parents a729928 + 512a80a commit 7ecd223
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 152 deletions.
Binary file not shown.
233 changes: 81 additions & 152 deletions tests/gentropy/method/test_colocalisation_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,177 +2,106 @@

from __future__ import annotations

from typing import Any

import pytest
from gentropy.dataset.colocalisation import Colocalisation
from gentropy.dataset.study_locus_overlap import StudyLocusOverlap
from gentropy.method.colocalisation import Coloc, ECaviar
from pandas.testing import assert_frame_equal
from pyspark.sql import SparkSession
from pyspark.sql import functions as f


def test_coloc(mock_study_locus_overlap: StudyLocusOverlap) -> None:
"""Test coloc."""
assert isinstance(Coloc.colocalise(mock_study_locus_overlap), Colocalisation)


def test_coloc_colocalise(
spark: SparkSession,
threshold: float = 1e-5,
) -> None:
"""Compare COLOC results with R implementation, using provided sample dataset from R package (StudyLocusOverlap)."""
test_overlap_df = spark.read.parquet(
"tests/gentropy/data_samples/coloc_test_data.snappy.parquet", header=True
)
test_overlap = StudyLocusOverlap(test_overlap_df, StudyLocusOverlap.get_schema())
test_result = Coloc.colocalise(test_overlap)

expected = spark.createDataFrame(
[
{
"h0": 1.3769995397857477e-18,
"h1": 2.937336451601565e-10,
"h2": 8.593226431647826e-12,
"h3": 8.338916748775843e-4,
"h4": 0.9991661080227981,
}
]
)
difference = test_result.df.select("h0", "h1", "h2", "h3", "h4").subtract(expected)
for col in difference.columns:
assert difference.filter(f.abs(f.col(col)) > threshold).count() == 0


def test_single_snp_coloc(
spark: SparkSession,
threshold: float = 1e-5,
) -> None:
"""Test edge case of coloc where only one causal SNP is present in the StudyLocusOverlap."""
test_overlap_df = spark.createDataFrame(
[
{
"leftStudyLocusId": 1,
"rightStudyLocusId": 2,
"chromosome": "1",
"tagVariantId": "snp",
"left_logBF": 10.3,
"right_logBF": 10.5,
}
]
)
test_overlap = StudyLocusOverlap(
test_overlap_df.select(
"leftStudyLocusId",
"rightStudyLocusId",
"chromosome",
"tagVariantId",
f.struct(f.col("left_logBF"), f.col("right_logBF")).alias("statistics"),
@pytest.mark.parametrize(
"observed_data, expected_data",
[
# associations with a single overlapping SNP
(
# observed overlap
[
{
"leftStudyLocusId": 1,
"rightStudyLocusId": 2,
"chromosome": "1",
"tagVariantId": "snp",
"statistics": {"left_logBF": 10.3, "right_logBF": 10.5},
},
],
# expected coloc
[
{
"h0": 9.254841951638903e-5,
"h1": 2.7517068829182966e-4,
"h2": 3.3609423764447284e-4,
"h3": 9.254841952564387e-13,
"h4": 0.9992961866536217,
},
],
),
StudyLocusOverlap.get_schema(),
)
test_result = Coloc.colocalise(test_overlap)

expected = spark.createDataFrame(
[
{
"h0": 9.254841951638903e-5,
"h1": 2.7517068829182966e-4,
"h2": 3.3609423764447284e-4,
"h3": 9.254841952564387e-13,
"h4": 0.9992961866536217,
}
]
)
difference = test_result.df.select("h0", "h1", "h2", "h3", "h4").subtract(expected)
for col in difference.columns:
assert difference.filter(f.abs(f.col(col)) > threshold).count() == 0


def test_single_snp_coloc_one_negative(
# associations with multiple overlapping SNPs
(
# observed overlap
[
{
"leftStudyLocusId": 1,
"rightStudyLocusId": 2,
"chromosome": "1",
"tagVariantId": "snp1",
"statistics": {"left_logBF": 10.3, "right_logBF": 10.5},
},
{
"leftStudyLocusId": 1,
"rightStudyLocusId": 2,
"chromosome": "1",
"tagVariantId": "snp2",
"statistics": {"left_logBF": 10.3, "right_logBF": 10.5},
},
],
# expected coloc
[
{
"h0": 4.6230151407950416e-5,
"h1": 2.749086942648107e-4,
"h2": 3.357742374172504e-4,
"h3": 9.983447421747411e-4,
"h4": 0.9983447421747356,
},
],
),
],
)
def test_coloc_semantic(
spark: SparkSession,
threshold: float = 1e-5,
observed_data: list[Any],
expected_data: list[Any],
) -> None:
"""Test edge case of coloc where only one causal SNP is present (On one side!) in the StudyLocusOverlap."""
test_overlap_df = spark.createDataFrame(
[
{
"leftStudyLocusId": 1,
"rightStudyLocusId": 2,
"chromosome": "1",
"tagVariantId": "snp",
"left_logBF": 18.3,
"right_logBF": 0.01,
}
]
"""Test our COLOC with the implementation in R."""
observed_overlap = StudyLocusOverlap(
_df=spark.createDataFrame(observed_data, schema=StudyLocusOverlap.get_schema()),
_schema=StudyLocusOverlap.get_schema(),
)
test_overlap = StudyLocusOverlap(
test_overlap_df.select(
"leftStudyLocusId",
"rightStudyLocusId",
"chromosome",
"tagVariantId",
f.struct(f.col("left_logBF"), f.col("right_logBF")).alias("statistics"),
),
StudyLocusOverlap.get_schema(),
observed_coloc_pdf = (
Coloc.colocalise(observed_overlap)
.df.select("h0", "h1", "h2", "h3", "h4")
.toPandas()
)
test_result = Coloc.colocalise(test_overlap)
test_result.df.show(1, False)
expected = spark.createDataFrame(
[
{
"h0": 1.0246538505087709e-4,
"h1": 0.9081680002273896,
"h2": 1.0349517929098209e-8,
"h3": 1.0246538506112363e-12,
"h4": 0.09172952403701702,
}
]
expected_coloc_pdf = (
spark.createDataFrame(expected_data)
.select("h0", "h1", "h2", "h3", "h4")
.toPandas()
)
difference = test_result.df.select("h0", "h1", "h2", "h3", "h4").subtract(expected)
for col in difference.columns:
assert difference.filter(f.abs(f.col(col)) > threshold).count() == 0


def test_single_snp_coloc_both_negative(
spark: SparkSession,
threshold: float = 1e-5,
) -> None:
"""Test edge case of coloc where only one non-causal SNP overlaps in the StudyLocusOverlap."""
test_overlap_df = spark.createDataFrame(
[
{
"leftStudyLocusId": 1,
"rightStudyLocusId": 2,
"chromosome": "1",
"tagVariantId": "snp",
"left_logBF": 0.03,
"right_logBF": 0.01,
}
]
)
test_overlap = StudyLocusOverlap(
test_overlap_df.select(
"leftStudyLocusId",
"rightStudyLocusId",
"chromosome",
"tagVariantId",
f.struct(f.col("left_logBF"), f.col("right_logBF")).alias("statistics"),
),
StudyLocusOverlap.get_schema(),
)
test_result = Coloc.colocalise(test_overlap)
expected = spark.createDataFrame(
[
{
"h0": 0.9997855774090624,
"h1": 1.0302335812225042e-4,
"h2": 1.0098335895103664e-4,
"h3": 9.9978557750904e-9,
"h4": 1.0405876008495098e-5,
}
]
assert_frame_equal(
observed_coloc_pdf,
expected_coloc_pdf,
check_exact=False,
check_dtype=True,
)
difference = test_result.df.select("h0", "h1", "h2", "h3", "h4").subtract(expected)
for col in difference.columns:
assert difference.filter(f.abs(f.col(col)) > threshold).count() == 0


def test_ecaviar(mock_study_locus_overlap: StudyLocusOverlap) -> None:
Expand Down

0 comments on commit 7ecd223

Please sign in to comment.