Skip to content

Commit

Permalink
Added bool remove_empty param to get_pseudobulk
Browse files Browse the repository at this point in the history
  • Loading branch information
PauBadiaM committed Nov 9, 2023
1 parent 75860c6 commit feabe4e
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
11 changes: 11 additions & 0 deletions decoupler/tests/test_utilsanndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,17 @@ def test_get_pseudobulk():
assert pdata.layers['median'] is not None
assert pdata.layers['mean'] is not None

m = np.array([[6, 0, 1, 0], [2, 0, 2, 0], [1, 3, 3, 0], [0, 1, 1, 0], [1, 0, 1, 0], [0, 0, 0, 0]])
r = np.array(['B1', 'B2', 'B3', 'B4', 'B5', 'B6'])
c = np.array(['G1', 'G2', 'G3', 'G4'])
df = pd.DataFrame(m, index=r, columns=c)
smples = np.array(['S1', 'S1', 'S2', 'S2', 'S1', 'S1'])
groups = np.array(['C1', 'C1', 'C1', 'C1', 'C2', 'C2'])
obs = pd.DataFrame([smples, groups], columns=r, index=[sample_col, groups_col]).T
adata = AnnData(df.astype(np.float32), obs=obs)
pdata = get_pseudobulk(adata, sample_col, groups_col, min_cells=0, min_counts=0, min_prop=None, min_smpls=None, remove_empty=False)
assert (pdata.shape[0] == 4) & (pdata.shape[1] == 4)


def test_get_unq_dict():
col = pd.Series(['C1', 'C1', 'C2', 'C3'], index=['S1', 'S2', 'S3', 'S4'])
Expand Down
9 changes: 6 additions & 3 deletions decoupler/utils_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def compute_psbulk(n_rows, n_cols, X, sample_col, groups_col, smples, groups, ob


def get_pseudobulk(adata, sample_col, groups_col, obs=None, layer=None, use_raw=False, mode='sum', min_cells=10,
min_counts=1000, dtype=np.float32, skip_checks=False, min_prop=None, min_smpls=None):
min_counts=1000, dtype=np.float32, skip_checks=False, min_prop=None, min_smpls=None, remove_empty=True):
"""
Summarizes expression profiles across cells per sample and group.
Expand Down Expand Up @@ -337,6 +337,8 @@ def get_pseudobulk(adata, sample_col, groups_col, obs=None, layer=None, use_raw=
min_smpls : int
Filter to remove genes by a minimum number of samples with non-zero values. Deprecated parameter,
check ``decoupler.filter_by_prop``.
remove_empty : bool
Whether to remove empty observations (rows) or features (columns).
Returns
-------
Expand Down Expand Up @@ -386,8 +388,9 @@ def get_pseudobulk(adata, sample_col, groups_col, obs=None, layer=None, use_raw=
psbulk = AnnData(psbulk.astype(dtype), obs=new_obs, var=var, layers=layers)

# Remove empty samples and features
msk = psbulk.X == 0
psbulk = psbulk[~np.all(msk, axis=1), ~np.all(msk, axis=0)].copy()
if remove_empty:
msk = psbulk.X == 0
psbulk = psbulk[~np.all(msk, axis=1), ~np.all(msk, axis=0)].copy()

# Place first element of mode dict as X
if type(mode) is dict:
Expand Down

0 comments on commit feabe4e

Please sign in to comment.