From b842bcfd05d9fd955c3465f0fecc00962b460cf5 Mon Sep 17 00:00:00 2001 From: Daniel Considine Date: Thu, 21 Mar 2024 11:28:34 +0000 Subject: [PATCH] fix: changes to get_major_population() function --- src/gentropy/dataset/study_index.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/gentropy/dataset/study_index.py b/src/gentropy/dataset/study_index.py index 9a7c6a2aa..b9e31b9a9 100644 --- a/src/gentropy/dataset/study_index.py +++ b/src/gentropy/dataset/study_index.py @@ -184,15 +184,14 @@ def has_summarystats(self: StudyIndex) -> Column: """ return self.df.hasSumstats - def get_major_population(self: StudyIndex) -> DataFrame: + def get_major_population(self: StudyIndex) -> Column: """Extract major population from ldPopulationStructure rows with multiple ancestries. Returns: - DataFrame: Columns studyId and the extracted major population from ldPopulationStructure. + Column: Columns studyId and the extracted major population from ldPopulationStructure. """ - return self.df.select( - "studyId", - f.array_max(f.col("ldPopulationStructure")) + return ( + f.array_max(self.df.ldPopulationStructure) .getItem("ldPopulation") - .alias("majorPopulation"), + .alias("majorPopulation") )