Skip to content

Commit

Permalink
spark: add IBD Transformer to python API #1
Browse files Browse the repository at this point in the history
  • Loading branch information
roldanx committed Jan 17, 2019
1 parent 35e0bea commit d6ea5b5
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 0 deletions.
40 changes: 40 additions & 0 deletions oskar-spark/src/main/python/pyoskar/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,46 @@ def setStudyId(self, value):
return self._set(studyId=value)


class IBDTransformer(AbstractTransformer):
samples = Param(Params._dummy(), "samples", "List of samples to use for calculating the IBS",
typeConverter=TypeConverters.toListString)
skipMultiAllelic = Param(Params._dummy(), "skipMultiAllelic", "Skip variants where any of the samples has a secondary alternate",
typeConverter=TypeConverters.toBoolean)
skipReference = Param(Params._dummy(), "skipReference", "Skip variants where both samples of the pair are HOM_REF",
typeConverter=TypeConverters.toBoolean)
numPairs = Param(Params._dummy(), "numPairs", "", typeConverter=TypeConverters.toInt)

@keyword_only
def __init__(self, samples=None, skipMultiAllelic=None, skipReference=None, numPairs=None):
super(IBDTransformer, self).__init__()
self._java_obj = self._new_java_obj("org.opencb.oskar.spark.variant.analysis.IBDTransformer", self.uid)
self.setParams(**self._input_kwargs)

def getSamples(self):
return self.getOrDefault(self.samples)

def setSamples(self, value):
return self._set(samples=value)

def getSkipMultiAllelic(self):
return self.getOrDefault(self.skipMultiAllelic)

def setSkipMultiAllelic(self, value):
return self._set(skipMultiAllelic=value)

def getSkipReference(self):
return self.getOrDefault(self.skipReference)

def setSkipReference(self, value):
return self._set(skipReference=value)

def getNumPairs(self):
return self.getOrDefault(self.numPairs)

def setNumPairs(self, value):
return self._set(numPairs=value)


class IBSTransformer(AbstractTransformer):
samples = Param(Params._dummy(), "samples", "List of samples to use for calculating the IBS",
typeConverter=TypeConverters.toListString)
Expand Down
25 changes: 25 additions & 0 deletions oskar-spark/src/main/python/pyoskar/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,31 @@ def histogram(self, df, inputCol, step=None):
"""
return HistogramTransformer(inputCol=inputCol, step=step).transform(df)

def ibd(self, df, samples=None, skipMultiAllelic=None, skipReference=None, numPairs=None):
"""
Calculates the Identity By Descendent.
:type df: DataFrame
:param df: Original dataframe
:type samples: list<str>
:param samples: List of samples to use for calculating the IBS
:type skipMultiAllelic: bool
:param skipMultiAllelic: Skip variants where any of the samples has a secondary alternate
:type skipReference: bool
:param skipReference: Skip variants where both samples of the pair are HOM_REF
:type numPairs: int
:param numPairs:
:rtype: DataFrame
:return: Transformed dataframe
"""
return IBDTransformer(samples=samples, skipReference=skipReference, skipMultiAllelic=skipMultiAllelic,
numPairs=numPairs).transform(df)

def ibs(self, df, samples=None, skipMultiAllelic=None, skipReference=None, numPairs=None):
"""
Calculates the Identity By State.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ def test_hardy_weinberg(self):
def test_histogram(self):
self.oskar.histogram(self.df, "start", 1000000).show(LIMIT)

def test_ibd(self):
self.oskar.ibd(self.df).show(LIMIT)

def test_ibd_full(self):
self.oskar.ibs(self.df, samples=["NA12877", "NA12878", "NA12879"], skipMultiAllelic=True, skipReference=True).show(LIMIT)

def test_ibs(self):
self.oskar.ibs(self.df).show(LIMIT)

Expand Down

0 comments on commit d6ea5b5

Please sign in to comment.