Skip to content

Commit

Permalink
GWAS and zygosity information
Browse files Browse the repository at this point in the history
* Add "zygosity information" command: Similar to allele frequency,
but emits the individual-based stats of how many AA, Aa, aa that
are for each variant.
* Better way to emit sorted calculations on mutations. New API
method getMutationsToNodeOrdered() emits nodes and mutation
IDs ordered by mutation position and allele.
* If you don't specify a phenotype file it will now use a random
vector.
* New flag --beta-only only calculates beta (the dot product), not
the additional statistics surrounding it (like plink does).
* GWAS can compute just beta (to mimic the dot-product calculation
  of XSI, which we compare against) or all of the fields necessary.
* Zygosity info breaks down how many individuals are hetero or
  homozygous in a particular variant.
* We optionally link in GNU Scientific Library (GSL) for computing
  p-values for GWAS. Use -DENABLE_GSL=ON to enable this, other
  the p-value will just be NaN (with a warning).
  • Loading branch information
Proteios1998 authored and dcdehaas committed Jul 22, 2024
1 parent 0698f63 commit e0bb4e1
Show file tree
Hide file tree
Showing 12 changed files with 421 additions and 307 deletions.
30 changes: 27 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
cmake_minimum_required(VERSION 3.14)
project(grgl)
include(ExternalProject)

option(PYTHON_SUPPORT "Build Python loadable module" OFF)
option(ENABLE_CHECKERS "Enable external tools like clang-tidy" OFF)
option(ENABLE_TESTS "Enable automated test execution" ON)
option(ENABLE_BGEN "Enable BGEN support" ON)
option(ENABLE_BGEN "Enable BGEN support" OFF)
option(ENABLE_GSL "Enable GSL support for stat calculations" OFF)

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
Expand Down Expand Up @@ -84,7 +86,6 @@ set(GRGL_PUBLIC_HEADERS
)

set(GRGL_CORE_SOURCES
${CMAKE_CURRENT_LIST_DIR}/src/calculations.cpp
${CMAKE_CURRENT_LIST_DIR}/src/build_shape.cpp
${CMAKE_CURRENT_LIST_DIR}/src/hap_index.cpp
${CMAKE_CURRENT_LIST_DIR}/src/grg.cpp
Expand All @@ -111,6 +112,25 @@ if (${ENABLE_BGEN})
add_compile_options(-DBGEN_ENABLED=1)
endif()

# We don't need AMPL bindings from gsl.
if (${ENABLE_GSL})
ExternalProject_Add(
gsl
SOURCE_DIR ${CMAKE_BINARY_DIR}/third-party/gsl
BINARY_DIR ${CMAKE_BINARY_DIR}/third-party/gsl
URL "https://mirror.ibcp.fr/pub/gnu/gsl/gsl-latest.tar.gz"
CMAKE_ARGS ""
CONFIGURE_COMMAND ${CMAKE_BINARY_DIR}/third-party/gsl/configure --prefix=<INSTALL_DIR>
BUILD_COMMAND ${MAKE}
INSTALL_COMMAND ""
TEST_COMMAND "")

set(GRGP_LIBS ${CMAKE_BINARY_DIR}/third-party/gsl/.libs/libgsl.a)
set(GRGP_DEPS gsl)
include_directories(${CMAKE_BINARY_DIR}/third-party/gsl/)
add_compile_options(-DGSL_ENABLED=1)
endif()

# Library libgrgl.a for C++ projects that want to use GRGL.
add_library(grgl STATIC ${GRGL_CORE_SOURCES})
target_compile_options(grgl PRIVATE
Expand All @@ -126,9 +146,13 @@ target_link_libraries(grgl_tool tskit grgl ${BGEN_LIBS} z)
# Tool for processing GRG files (stats, etc).
add_executable(grgp_tool
${CMAKE_CURRENT_LIST_DIR}/src/grgp.cpp
${CMAKE_CURRENT_LIST_DIR}/src/calculations.cpp
)
set_target_properties(grgp_tool PROPERTIES OUTPUT_NAME "grgp")
target_link_libraries(grgp_tool grgl)
target_link_libraries(grgp_tool grgl ${GRGP_LIBS})
if (${ENABLE_GSL})
add_dependencies(grgp_tool ${GRGP_DEPS})
endif()


# Tool for merging GRGs.
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ RUN apt update && \
COPY . /grgl_src

# Install GRGL python API.
RUN cd /grgl_src && pip3 install wheel && python3 setup.py bdist_wheel
RUN cd /grgl_src && pip3 install wheel && python3 setup.py bdist_wheel --bgen --gsl
# Install GRGL command line tools and scripts. Installing the above python package
# will also build this, but there are some extra tools we might want.
RUN cd /grgl_src && mkdir cpp_build && cd cpp_build && mkdir /grgl_inst && \
cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/grgl_inst && \
cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_BGEN=ON -DENABLE_GSL=ON -DCMAKE_INSTALL_PREFIX=/grgl_inst && \
make -j && \
make install

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ source /path/to/MyEnv/bin/activate
pip install --install-option="--copy-bins" -v -e .
```

BGEN support may not build easily on all platforms (such as MacOS). In that case you can use the `--no-bgen` option, such as:
* `python setup.py bdist_wheel --no-bgen`
* or `pip install --install-option="--no-bgen" --install-option="--copy-bins" -v -e .`
BGEN support is disabled by default. If you want to enable it:
* `python setup.py bdist_wheel --bgen`
* or `pip install --install-option="--bgen" --install-option="--copy-bins" -v -e .`

Build and installation should take at most a few minutes on the typical computer.

Expand Down
25 changes: 14 additions & 11 deletions include/grgl/grg.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ class GRG : public std::enable_shared_from_this<GRG> {
};

explicit GRG(size_t numSamples, uint16_t ploidy)
: m_numSamples(numSamples)
, m_ploidy(ploidy) {}
: m_numSamples(numSamples),
m_ploidy(ploidy) {}

virtual ~GRG() = default;
GRG(const GRG&) = delete;
Expand All @@ -78,7 +78,7 @@ class GRG : public std::enable_shared_from_this<GRG> {

size_t numSamples() const { return m_numSamples; }

/**
/**
* How many haploid samples are there per individual?
*
* @return The ploidy, usually 1 or 2. Individual coalescence support only works when ploidy==2.
Expand Down Expand Up @@ -138,6 +138,13 @@ class GRG : public std::enable_shared_from_this<GRG> {
return findIt != m_nodeToMutations.end();
}

/**
* Get pairs of mutation IDs and node IDs, ordered by the mutation position + allele (ascending).
*
* @return A vector of pairs, MutationID and NodeID (in that order).
*/
std::vector<std::pair<MutationId, NodeID>> getMutationsToNodeOrdered() const;

/**
* Visit nodes breadth-first, starting at the given nodes and following up or
* down edges.
Expand All @@ -149,10 +156,8 @@ class GRG : public std::enable_shared_from_this<GRG> {
* @param[in] maxQueueWidth The maximum width of the queue; restricts the number of
* end-to-end paths that will be visited.
*/
void visitBfs(GRGVisitor& visitor,
TraversalDirection direction,
const NodeIDList& seedList,
ssize_t maxQueueWidth = -1);
void
visitBfs(GRGVisitor& visitor, TraversalDirection direction, const NodeIDList& seedList, ssize_t maxQueueWidth = -1);

/**
* Visit nodes depth-first, starting at the given nodes and following up or
Expand All @@ -171,10 +176,8 @@ class GRG : public std::enable_shared_from_this<GRG> {
* forwardOnly will only visit nodes in the forward direction. It also causes
* nodes to be visited an arbitrary number of times.
*/
void visitDfs(GRGVisitor& visitor,
TraversalDirection direction,
const NodeIDList& seedList,
bool forwardOnly = false);
void
visitDfs(GRGVisitor& visitor, TraversalDirection direction, const NodeIDList& seedList, bool forwardOnly = false);

virtual std::vector<NodeIDSizeT> topologicalSort(TraversalDirection direction) = 0;

Expand Down
12 changes: 8 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@

C_MODULE_NAME = "_grgl"
ARG_DEBUG = "--debug-build"
ARG_NOBGEN = "--no-bgen"
ARG_BGEN = "--bgen"
ARG_COPYBINS = "--copy-bins"
ARG_GSL = "--gsl"

THISDIR = os.path.realpath(os.path.dirname(__file__))

Expand All @@ -20,12 +21,15 @@
if arg == ARG_DEBUG:
build_type = "Debug"
sys.argv.remove(ARG_DEBUG)
elif arg == ARG_NOBGEN:
extra_cmake_args.append("-DENABLE_BGEN=OFF")
sys.argv.remove(ARG_NOBGEN)
elif arg == ARG_BGEN:
extra_cmake_args.append("-DENABLE_BGEN=ON")
sys.argv.remove(ARG_BGEN)
elif arg == ARG_COPYBINS:
copy_bins = True
sys.argv.remove(ARG_COPYBINS)
elif arg == ARG_GSL:
extra_cmake_args.append("-DENABLE_GSL=ON")
sys.argv.remove(ARG_GSL)

class CMakeExtension(Extension):
def __init__(self, name, cmake_lists_dir=".", sources=[], extra_executables=[], **kwa):
Expand Down
Loading

0 comments on commit e0bb4e1

Please sign in to comment.