Source code for mito.tl.bootstrap

"""
Bootstrap utils.
"""

import numpy as np
from scipy.sparse import issparse, csr_matrix
from anndata import AnnData


##


def bootstrap_allele_tables(afm, layer='AD', frac_char_resampling=.8):
    """
    Bootstrap of an Allele Frequency Matrix (AFM) layer.
    Both sparse matrices and dense .layers can be passed.
    """

    # Get layer
    if layer in afm.layers:
        X = afm.layers[layer]
        X = X if not issparse(X) else X.toarray()
    else:
        raise KeyError(f'{layer} not present in afm! Check your inputs...')

    # Resample afm.var index
    n = X.shape[1]
    if frac_char_resampling == 1:
        resampled_idx = np.random.choice(np.arange(n), n, replace=True) 
    else:    
        resampled_idx = np.random.choice(np.arange(n), round(n*frac_char_resampling), replace=False)   

    return X[:,resampled_idx], resampled_idx
    


##


def jackknife_allele_tables(afm, layer='AD'):
    """
    Jackknife of an Allele Frequency Matrix (AFM) layer.
    Both sparse matrices and dense .layers can be passed.
    """

    # Get layer
    if layer in afm.layers:
        X = afm.layers[layer]
        X = X if not issparse(X) else X.toarray()
    else:
        raise KeyError(f'{layer} not present in afm! Check your inputs...')

    # Resample afm.var index
    n = X.shape[1]
    to_exclude = np.random.choice(np.arange(n), 1)[0]
    resampled_idx = [ x for x in np.arange(n) if x != to_exclude ]

    return X[:,resampled_idx], resampled_idx


##


[docs] def bootstrap_MiTo( afm: AnnData, boot_replicate: str = 'observed', boot_strategy: str ='feature_resampling', frac_char_resampling: float = .8 ) -> AnnData: """ Bootstrap MAESTER/RedeeM Allele Frequency matrices. """ if boot_replicate != 'observed': cov_layer = 'site_coverage' if 'site_coverage' in afm.layers else 'DP' if boot_strategy == 'jacknife': AD, _ = jackknife_allele_tables(afm, layer='AD') cov, idx = jackknife_allele_tables(afm, layer=cov_layer) # USE SITE, NBBB elif boot_strategy == 'feature_resampling': AD, _ = bootstrap_allele_tables(afm, layer='AD', frac_char_resampling=frac_char_resampling) cov, idx = bootstrap_allele_tables(afm, layer=cov_layer, frac_char_resampling=frac_char_resampling) # USE SITE, NBBB elif boot_strategy == 'counts_resampling': raise ValueError(f'#TODO: {boot_strategy} boot_strategy. This strategy is not supported yet.') else: raise ValueError(f'{boot_strategy} boot_strategy is not supported...') AF = csr_matrix(np.divide(AD, (cov+.0000001))) AD = csr_matrix(AD) cov = csr_matrix(cov) afm_new = AnnData(X=AF, obs=afm.obs, var=afm.var.iloc[idx,:], uns=afm.uns, layers={'AD':AD, cov_layer:cov}) else: afm_new = afm.copy() return afm_new
## def bootstrap_bin( afm: AnnData, boot_replicate: str = 'observed', boot_strategy: str ='feature_resampling', frac_char_resampling: float = .8 ) -> AnnData: """ Bootstrap scWGS/Cas9 AFMs. """ if boot_replicate != 'observed': if boot_strategy == 'jacknife': X_new, idx = jackknife_allele_tables(afm, layer='bin') elif boot_strategy == 'feature_resampling': X_new, idx = bootstrap_allele_tables(afm, layer='bin', frac_char_resampling=frac_char_resampling) else: raise ValueError(f'{boot_strategy} boot_strategy is not supported...') X_new = csr_matrix(X_new) afm_new = AnnData( obs=afm.obs, var=afm.var.iloc[idx,:], uns=afm.uns, layers={'bin':X_new} ) if 'priors' in afm.varm: afm_new.varm['priors'] = afm.varm['priors'][idx,:] else: afm_new = afm.copy() return afm_new ##