Source code for mito.tl.annotate

"""
Main MiTo class for MT-SNVs single-cell phylogenies annotation.
"""

import logging
import numpy as np
import pandas as pd
import cassiopeia as cs
from tqdm import tqdm
from itertools import product
from typing import Iterable, Tuple
from joblib import Parallel, delayed, parallel_backend, cpu_count
from cassiopeia.data import CassiopeiaTree
from scipy.stats import fisher_exact
from statsmodels.sandbox.stats.multicomp import multipletests
from sklearn.metrics import silhouette_score
from cassiopeia.tools.fitness_estimator._lbi_jungle import LBIJungle
from ..pp.distances import weighted_jaccard
from ..ut.phylo_utils import get_clades, get_internal_node_stats
from ..ut.utils import Timer, rescale
from .phylo import _get_leaves_order


##


def get_ranges_and_gaps(s: pd.Series, label):

    vals = s.values
    m = (vals == label)

    def _get_runs(bool_arr):
        padded = np.concatenate(([False], bool_arr, [False]))
        d = np.diff(padded.astype(int))
        starts = np.where(d == 1)[0]
        ends   = np.where(d == -1)[0]
        return list(zip(starts, ends))

    present = _get_runs(m)
    gaps    = _get_runs(~m)
    return present, gaps


##


def find_longest_stretch(ranges, series):

    n = len(ranges)
    if n == 0:
        return (), ()
    
    non_nan = ~series.isna().values
    prefix = np.concatenate([[0], non_nan.cumsum()])
    
    best_start = 0
    best_len = 1
    curr_start = 0

    for i in range(1, n):
        prev_end = ranges[i-1][1]
        curr_begin = ranges[i][0]
        if prefix[curr_begin] - prefix[prev_end] == 0:
            pass
        else:
            length = i - curr_start
            if length > best_len:
                best_len = length
                best_start = curr_start
                best_end = i - 1
            curr_start = i

    length = n - curr_start
    if length > best_len:
        best_len = length
        best_start = curr_start
        best_end = n - 1
    else:
        best_end = best_start + best_len - 1

    best_seq = tuple(ranges[best_start:best_end+1])
    others = tuple(r for idx, r in enumerate(ranges) if idx < best_start or idx > best_end)

    return best_seq, others


##


def _get_muts_order(tree):

    tree_ = tree.copy()
    model = MiToTreeAnnotator(tree_)
    model.get_T()
    model.get_M()
    model.extract_mut_order()

    return model.ordered_muts


##


def _compute_mutation_enrichment_batch(start_pos, end_pos, T_columns, tree_values, T_values, muts_size, alpha):
    """
    Compute mutation enrichment for a batch of tree nodes using joblib.
    
    Parameters:
    - start_pos: start index for node batch
    - end_pos: end index for node batch  
    - T_columns: column names (nodes) to process
    - tree_values: tree mutation matrix values
    - T_values: T matrix values (cell assignment matrix)
    - muts_size: number of mutations
    - alpha: significance level for FDR correction
    
    Returns:
    - List of tuples: [(node_name, pvals), ...]
    """
    results = []
    for i in range(start_pos, end_pos):
        node_name = T_columns[i]
        node_T_values = T_values[:,i]
        target_ratio_array = np.zeros(muts_size)
        oddsratio_array = np.zeros(muts_size)
        pvals = np.zeros(muts_size)
        test_lineage = node_T_values == 1
        
        for j in range(muts_size):
            test_mut = tree_values[:,j] == 1
            n_mut_lineage = np.sum(test_mut & test_lineage)
            n_mut_no_lineage = np.sum(test_mut & ~test_lineage)
            n_no_mut_lineage = np.sum(~test_mut & test_lineage)
            n_no_mut_no_lineage = np.sum(~test_mut & ~test_lineage)
            target_ratio_array[j] = n_mut_lineage / (np.sum(test_mut) + 1e-8)
            
            # Fisher's exact test
            oddsratio, pvalue = fisher_exact(
                [
                    [n_mut_lineage, n_mut_no_lineage],
                    [n_no_mut_lineage, n_no_mut_no_lineage],
                ],
                alternative='greater',
            )
            oddsratio_array[j] = oddsratio
            pvals[j] = pvalue
        
        # Adjust p-values (FDR correction)
        pvals = multipletests(pvals, alpha=alpha, method="fdr_bh")[1]
        results.append((node_name, pvals))
    
    return results


##


[docs] class MiToTreeAnnotator(): """ MiTo tree annotation class. Performs clonal inference from an arbitrary MT-SNVs-based phylogeny. """ def __init__(self, tree: CassiopeiaTree): """ Initialize class slots from input CassiopeiaTree. """ # Slots self.tree = tree # .copy() self.T = None self.M = None self.solutions = None self.clone_df = None self.internal_nodes_df = None self.clonal_nodes = None self.mut_df = None self.ordered_muts = None self.S_aggregate = None self.params = {} ##
[docs] def get_T(self, with_root: bool = True): """ Compute the "cell assignment" matrix, T. T is a cell x clade (internal node) binary matrix mapping each cell i to every clade j. """ logging.info('Retrieve cell assignment to tree clades') clades = get_clades(self.tree, with_root=with_root, with_singletons=True) T = np.array([[x in clades[clade] for x in self.tree.leaves] for clade in clades]).astype(int) T = ( pd.DataFrame(T, index=list(clades.keys()), columns=self.tree.leaves) .T.loc[self.tree.layers['transformed'].index] ) self.T = T
##
[docs] def get_M(self, alpha: float = 0.05, n_cores: int = None): """ Compute the "mutation enrichment" matrix, M. M is a mut x clade matrix storing for each mutation i and clade j the enrichment value defined as -log10(pval) from a Fisher's Exact test. Uses joblib for parallel processing. """ muts = self.tree.layers['transformed'].columns logging.info(f'Compute mutations enrichment: n muts={len(muts)}') # Set number of cores if n_cores is None: n_cores = max(1, cpu_count()-1) # Cache matrices once to avoid multiple copies across workers tree_values = self.tree.layers['transformed'].values T_values = self.T.values T_columns = self.T.columns.tolist() # Prepare batches n_nodes = len(T_columns) quotient = n_nodes // n_cores residue = n_nodes % n_cores intervals = [] start = end = 0 for i in range(n_cores): end = start + quotient + (i < residue) if end == start: break intervals.append((start, end)) start = end # Parallel computation using joblib like the filter pattern with parallel_backend("loky", inner_max_num_threads=1): result_list = Parallel(n_jobs=len(intervals))( delayed(_compute_mutation_enrichment_batch)( start_pos, end_pos, T_columns, tree_values, T_values, muts.size, alpha ) for start_pos, end_pos in intervals ) # Flatten results from all batches P = {} for batch_results in result_list: for node_name, pvals in batch_results: P[node_name] = pvals M = pd.DataFrame(P, index=muts) M = -np.log10(M) self.M = M
## def _get_disjoint_clones(self, ordered_clones): cats = ordered_clones.value_counts().index resolve_list = [] for cat in cats: clone_ranges, gaps = get_ranges_and_gaps(ordered_clones, cat) for r in clone_ranges: assert (ordered_clones.iloc[r[0]:r[1]]==cat).all() for g in gaps: assert (ordered_clones.iloc[g[0]:g[1]]!=cat).all() gaps = [ g for g in gaps if g[0]!=0 and g[1]!=len(ordered_clones) ] all_gaps_nans = True for g in gaps: if not ordered_clones.iloc[g[0]:g[1]].isna().all(): all_gaps_nans = False if not all_gaps_nans: resolve_list.append([cat, clone_ranges]) return resolve_list ## def _check_inference(self): ordered_clones = self.tree.cell_meta.loc[_get_leaves_order(self.tree), 'MiTo clone'] clones = ordered_clones.value_counts().index assert len(self._get_disjoint_clones(ordered_clones))==0 assert ( (self.tree.cell_meta .groupby('lca')['MiTo clone'] .nunique()==1) .all() ) for clone in clones: nodes = ( self.tree.cell_meta .loc[self.tree.cell_meta['MiTo clone']==clone, 'lca'] .unique() ) assert len(nodes) <= 1 if len(nodes)==1: lca = nodes[0] cells = ( self.tree.cell_meta .loc[self.tree.cell_meta['MiTo clone']==clone] .index.to_list() ) lca_ = self.tree.find_lca(*cells) assert(lca==lca_) ##
[docs] def resolve_ambiguous_clones( self, df_predict: pd.DataFrame, merging_treshold: float = .7, af_treshold: float = .0, add_to_meta: bool = False, ) -> Tuple[pd.Series,pd.Series]: """ Final clonal resolution process. Tries to merge similar clones, iteratively. First, the (raw) AF matrix is aggregated at the clonal level using MiTo clones. Then, clone-clone similarities are computed using (1-) weighted jaccard distances among these aggregated MT-SNVs clonal profiles. At each round, the tiniest "ambiguous" clone is selected for merging with its smallest "interacting clone". If the merge is successfull, the clonal assignment table is updated and the process go through other merging rounds, until no ambiguous clones remain. Unresolved clones (if any) are annotated as NaNs in the final MiTo clone column which is appended to `self.tree.cell_meta`. """ self.params['merging_treshold'] = merging_treshold remained_unresolved = [] try_merge = True n_trials = 0 # Here we go while try_merge: # Aggregate X_agg = ( self.tree.layers['raw'] .join(df_predict[['MiTo clone']]) .groupby('MiTo clone') .median() ) X_agg = X_agg.loc[:,np.any(X_agg>af_treshold, axis=0)] # Compute similarity of putative clones (i.e., aggregated profiles) w = np.nanmedian(np.where(X_agg>af_treshold, X_agg, np.nan), axis=0) S_agg = 1-weighted_jaccard((X_agg>af_treshold).astype(int), w=w) self.S_aggregate = pd.DataFrame(S_agg, index=X_agg.index, columns=X_agg.index) # Spot interacting clones S_agg_long = ( pd.DataFrame( np.triu(S_agg, k=1), index=X_agg.index.to_list(), columns=X_agg.index.to_list() ) .melt(ignore_index=False) .reset_index() ) S_agg_long.columns = ['clone1', 'clone2', 'similarity'] S_agg_long = ( S_agg_long .query('clone1!=clone2 and similarity>=@merging_treshold') .reset_index(drop=True).drop_duplicates() ) if S_agg_long.shape[0] == 0: try_merge = False # Attempt merging, starting from the tiniest ambiguous clone # Gather clonal stats clone_df = ( df_predict .loc[:,['MiTo clone', 'lca', 'n cells', 'muts']] .reset_index(drop=True).drop_duplicates() .sort_values('n cells') .dropna() .rename(columns={'MiTo clone':'old'}) .set_index('old') ) self.clone_df = clone_df # Find tiniest ambiguous clone if try_merge: ambiguous_clones = set(S_agg_long['clone1']) | set(S_agg_long['clone2']) for clone in clone_df.index: if clone in ambiguous_clones and not clone in remained_unresolved: try_merge = True break else: try_merge = False if try_merge: # Find the tiniest ambiguous clone interactor interactions = S_agg_long.query('clone1==@clone or clone2==@clone') other_clones = [] for i in range(interactions.shape[0]): int_clones = interactions.iloc[i,:].values[:2] other_clones.append(int_clones[int_clones!=clone][0]) int_clone = clone_df.loc[other_clones].sort_values('n cells').index[0] # Find lca new clone lca_clone = clone_df.loc[clone, 'lca'] lca_int_clone = clone_df.loc[int_clone, 'lca'] if lca_clone == lca_int_clone: new_lca = lca_clone else: new_lca = self.tree.find_lca(*[lca_clone, lca_int_clone]) # Merge clones mutations clone_muts = set(clone_df.loc[clone, 'muts'].split(';')) int_clone_muts = set(clone_df.loc[int_clone, 'muts'].split(';')) new_clone_muts = ';'.join(list(clone_muts | int_clone_muts)) # Check merging possibility cells_merged_clone = ( df_predict .loc[ lambda x: (x['MiTo clone'] == clone) | (x['MiTo clone'] == int_clone)] .index ) cells_clone = df_predict.loc[lambda x: x['MiTo clone'] == clone].index cells_int_clone = df_predict.loc[lambda x: x['MiTo clone'] == int_clone].index test = len(set(cells_merged_clone) - (set(cells_clone) | set(cells_int_clone)) ) == 0 if test: df_predict.loc[cells_merged_clone, 'MiTo clone'] = int_clone df_predict.loc[cells_merged_clone, 'median cell similarity'] = ( self.tree.get_attribute(new_lca, 'similarity') ) df_predict.loc[cells_merged_clone, 'n cells'] = cells_merged_clone.size df_predict.loc[cells_merged_clone, 'lca'] = new_lca df_predict.loc[cells_merged_clone, 'muts'] = new_clone_muts else: remained_unresolved.append(clone) n_trials += 1 # Put unresolved_clones as NaNs df_predict.loc[df_predict['MiTo clone'].isin(remained_unresolved), 'MiTo clone'] = np.nan # Last conflict resolution: solve disjoint clones, if any ordered_clones = df_predict.loc[_get_leaves_order(self.tree), 'MiTo clone'] resolve_list = self._get_disjoint_clones(ordered_clones) if resolve_list: for i in range(len(resolve_list)): clone, clone_ranges = resolve_list[i] accepted, discarded = find_longest_stretch(clone_ranges, ordered_clones) accepted_range = (accepted[0][0], accepted[-1][1]) final_clone_cells = ordered_clones.index[accepted_range[0]:accepted_range[1]] df_predict.loc[final_clone_cells, 'lca'] = ( self.tree.find_lca(*final_clone_cells) ) final_na_set = [] for r in discarded: final_na_set.extend( ordered_clones.index[r[0]:r[1]] ) df_predict.loc[final_na_set, 'MiTo clone'] = np.nan df_predict.loc[final_na_set, 'lca'] = np.nan df_predict.loc[final_na_set, 'muts'] = np.nan # Rename MiTo clones according to their lca size sizes = df_predict['lca'].value_counts() mapping = { x : f'MT-{i}' for i,x in enumerate(sizes.index) } df_predict['MiTo clone'] = df_predict['lca'].map(mapping) self.clonal_nodes = df_predict.loc[lambda x: ~x['lca'].isna(), 'lca'].unique().tolist() # Check if the df_predict info should be added to self.tree.cell_meta if add_to_meta: df_predict['MiTo clone'] = pd.Categorical(df_predict['MiTo clone']) df_predict['lca'] = pd.Categorical(df_predict['lca']) df_predict['muts'] = pd.Categorical(df_predict['muts']) cols = [ x for x in self.tree.cell_meta.columns \ if x not in df_predict.columns ] self.tree.cell_meta = self.tree.cell_meta[cols] self.tree.cell_meta = self.tree.cell_meta.join(df_predict) self._check_inference() return df_predict['MiTo clone'], df_predict['median cell similarity']
##
[docs] def compute_cell_fitness(self): """ LBI method (Neher et al., 2014) from Cassiopeia. """ logging.info('Estimate cell fitness scores') model = LBIJungle(random_seed=1234) model.estimate_fitness(self.tree) self.tree.set_attribute(self.tree.root, 'fitness', 0) d = {} for node in self.tree.nodes: d[node] = self.tree.get_attribute(node, 'fitness') # Rescale s = pd.Series(d) s = (s-s.mean()) / s.std(ddof=0) # Add cell values to tree metadata self.tree.cell_meta['fitness'] = s.loc[self.tree.cell_meta.index]
##
[docs] def compute_expansions(self): """ Call cassiopeia.tools.compute_expansion_pvalues. Compute clonal expansion pvalues as descrived in Yang, Jones et al, BioRxiv (2021). """ logging.info('Compute expansion pvalues') cs.tl.compute_expansion_pvalues(self.tree)
##
[docs] def extract_mut_order(self, pval_tresh: float = .01): """ Extract diagonal-order of MT-SNVs using mutation assignments, to create a ordered list of MT-SNVs for plotting. """ assert (self.M.index == self.tree.layers['transformed'].columns).all() # Define a MT-SNVs treshold mut_df = (10**(-self.M.max(axis=1))<=pval_tresh).to_frame('assignment') mut_df['prevalence'] = self.tree.layers['transformed'].sum(axis=0) / len(self.tree.leaves) mut_df['top_node'] = self.M.columns[self.M.values.argmax(axis=1)] top_nodes = mut_df.loc[mut_df['assignment'], 'top_node'].to_list() mutation_order = [] for node in self.tree.depth_first_traverse_nodes(): if node in top_nodes: mut = mut_df.loc[mut_df['top_node']==node].index[0] mutation_order.append(mut) unassigned_muts = mut_df.loc[~mut_df['assignment']].sort_values('prevalence').index.to_list() mutation_order += unassigned_muts self.ordered_muts = mutation_order self.mut_df = mut_df
##
[docs] def infer_clones(self, similarity_percentile: float = 85, mut_enrichment_treshold: int = 5) -> pd.DataFrame: """ A MT-SNVs-specific re-adaptation of the recursive approach described in the MethylTree paper (... et al., 2025). """ # Prep lists for recursion tree_list = [] df_list = [] # Get tree topology and mutation_enrichment tables if self.T is None or self.M is None: self.get_T() self.get_M() # Set usable_mutations is_enriched = (self.M.values>=mut_enrichment_treshold).any(axis=1) mut_enrichment = self.M.loc[is_enriched,:] usable_mutations = set(mut_enrichment.index) self.params['mut_enrichment_treshold'] = mut_enrichment_treshold self.params['n total mutations'] = self.tree.layers['raw'].shape[1] self.params['n usable_mutations'] = len(usable_mutations) # Convert the dissimilarity into similarity matrix D = self.tree.get_dissimilarity_map() S = 1-D # Calculate similarity treshold similarity_treshold = np.percentile(S.values, similarity_percentile) self.params['similarity_treshold'] = similarity_treshold # Set median similarity among clade cells as tree node attributes clades = get_clades(self.tree, with_singletons=True) for node in clades: cells = list(clades[node]) s = np.median(S.loc[cells,cells].values) self.tree.set_attribute(node, 'similarity', s) ## # Internal, recursive functions =========================================================== # def _find_clones(tree, node, usable_mutations): if not usable_mutations: _collect_clones(tree, node) return if tree.is_leaf(node): _collect_clones(tree, node) elif ( ((mut_enrichment.loc[list(usable_mutations), node] >= mut_enrichment_treshold).any()) & \ (tree.get_attribute(node, 'similarity') >= similarity_treshold) & \ (not tree.is_leaf(node)) ): triggered = { mut for mut in usable_mutations \ if mut_enrichment.loc[mut,node] >= mut_enrichment_treshold } tree.set_attribute(node, 'muts', list(triggered)) new_usable = usable_mutations - triggered _refine_clones(tree, node, new_usable) else: for child in tree.children(node): _find_clones(tree, child, usable_mutations) ## def _collect_clones(tree, node_tmp): tree_list.append(node_tmp) leaves = tree.leaves_in_subtree(node_tmp) df_tmp = pd.DataFrame({"cell": leaves}) df_tmp["MiTo clone"] = f"MT-{len(df_list)}" df_tmp["median cell similarity"] = tree.get_attribute(node_tmp, 'similarity') df_tmp["n cells"] = len(leaves) df_tmp['lca'] = tree.find_lca(*leaves) if len(leaves)>1 else np.nan try: df_tmp['muts'] = ';'.join(tree.get_attribute(node_tmp, 'muts')) except: pass df_list.append(df_tmp) ## def _collect_clade_info(tree, node_tmp, usable_mutations, level=0, data_list=[], name="0"): valid_tmp = ( ((mut_enrichment.loc[list(usable_mutations), node_tmp] >= mut_enrichment_treshold).any()) & \ (tree.get_attribute(node_tmp, 'similarity') >= similarity_treshold) & \ (not tree.is_leaf(node_tmp)) ) data_list.append([level, int(valid_tmp), len(tree.leaves_in_subtree(node_tmp)), name]) for j0, child_tmp in enumerate(tree.children(node_tmp)): _collect_clade_info( tree, child_tmp, usable_mutations, level=level+1, data_list=data_list, name=f"{name},{j0}" ) ## def _add_clade_info(tree, node_tmp, target_level, target_name_list, level=0, name="0"): valid_list = [] invalid_list = [] for target_name in target_name_list: condition_1 = target_name == name # this sub-tree matches our target names condition_2 = target_name.startswith(name) # this sub-tree is along the branch that leads to the target sub-tree, therefore acceptable condition_3 = name.startswith(target_name) # this sub-tree is contained within one of the target sub-trees valid_list.append(condition_1) invalid_list.append(condition_2 or condition_3) valid = np.sum(valid_list) > 0 if not valid: valid = np.sum(invalid_list) == 0 if valid: _collect_clones(tree, node_tmp) if name not in target_name_list: target_name_list.append(name) if level < target_level: for j0, child_tmp in enumerate(tree.children(node_tmp)): _add_clade_info( tree, child_tmp, target_level, target_name_list, level=level + 1, name=f"{name},{j0}", ) else: return ## def _refine_clones(tree, node, usable_mutations): if not usable_mutations: _collect_clones(tree, node) return data_list = [] _collect_clade_info(tree, node, usable_mutations, level=0, data_list=data_list) df_tmp_orig = pd.DataFrame( np.array(data_list), columns=["level", "score", "cell_N", "name"] ) df_tmp_orig["level"] = df_tmp_orig["level"].astype(int) df_tmp_orig["score"] = df_tmp_orig["score"].astype(float) df_tmp_orig["cell_N"] = df_tmp_orig["cell_N"].astype(int) df_tmp = ( df_tmp_orig.groupby("level") .agg({"score": "mean", "cell_N": "sum"}) .reset_index() ) df_tmp = df_tmp[(df_tmp["score"] == 1)] target_level = df_tmp["level"].max() target_name_list = df_tmp_orig[df_tmp_orig["level"] == target_level]["name"].to_list() _add_clade_info(tree, node, target_level, target_name_list) # ============================================================================================ # # Fire recursion! tree_ = self.tree.copy() _find_clones(tree_, tree_.root, usable_mutations) # Get results df_predict = pd.concat(df_list, ignore_index=True) df_predict = df_predict.set_index('cell') df_predict.loc[df_predict['muts'].isna(), 'MiTo clone'] = np.nan return df_predict
##
[docs] def clonal_inference( self, similarity_tresholds: Iterable[int] = [ 85, 90, 95, 99 ], mut_enrichment_tresholds: Iterable[int] = [ 3, 5, 10 ], merging_treshold: Iterable[float] = [ .25, .5, .75 ], af_treshold: float = .0, weight_silhouette: float =.3, weight_n_clones: float = .4, weight_similarity: float = .3, max_fraction_unassigned: float = .05, n_cores: int = None, ): """ Optimize tresholds for `self.infer_clones` and pick clonal labels with best silhouette scores across the attempted splits. """ T = Timer() T.start() # Get tree topology and mutation_enrichment tables if self.T is None or self.M is None: self.get_T() self.get_M(n_cores=n_cores) # Define combos combos = list(product(similarity_tresholds, mut_enrichment_tresholds, merging_treshold)) logging.info(f'Start Grid Search. n hyper-parameter combinations to explore: {len(combos)}') silhouettes = [] unassigned = [] n_clones = [] similarities = [] # Grid search for i, (s, m, j) in enumerate(tqdm(combos, total=len(combos), desc="Grid Search")): try: df_predict = self.infer_clones(similarity_percentile=s, mut_enrichment_treshold=m) labels, sim = self.resolve_ambiguous_clones( df_predict, merging_treshold=j, af_treshold=af_treshold, add_to_meta=False ) test = labels.isna() labels = labels.loc[~test] D = self.tree.get_dissimilarity_map().loc[labels.index,labels.index] assert (D.index == labels.index).all() sil = silhouette_score(X=D.values, labels=labels.values, metric='precomputed') if labels.unique().size>=2 else 0 silhouettes.append(sil) unassigned.append(test.sum()/labels.size) n_clones.append(labels.unique().size) similarities.append(sim.mean()) except: # Some error occurred with this hyper-parameter combo pass # Pick optimal combination, and perform final splitting self.solutions = ( pd.DataFrame({'silhouette':silhouettes, 'unassigned':unassigned, 'n_clones':n_clones, 'similarity':similarities}) .assign( sil_rescaled = lambda x: rescale(x['silhouette']), sim_rescaled = lambda x: rescale(x['similarity']), n_clones_rescaled = lambda x: rescale(-x['n_clones']), score = lambda x: weight_silhouette * x['sil_rescaled'] + \ weight_n_clones * x['n_clones_rescaled'] + \ weight_similarity * x['similarity'] ) .sort_values('score', ascending=False) ) ranked_solutions = ( self.solutions .query('unassigned<=@max_fraction_unassigned') .sort_values('score', ascending=False) ) if ranked_solutions.shape[0]>0: s, m, j = combos[ranked_solutions.index[0]] else: raise ValueError( f''' None of the solution tested falls below the max_fraction_unassigned treshold: {max_fraction_unassigned} ''' ) # Final round logging.info(f'Hyper-params chosen: similarity_percentile={s}, mut_enrichment_treshold={m}, merging_treshold={j}') df_predict = self.infer_clones(similarity_percentile=s, mut_enrichment_treshold=m) _, _ = self.resolve_ambiguous_clones( df_predict, merging_treshold=j, af_treshold=af_treshold, add_to_meta=True ) # Calculate clonal expansions self.compute_expansions() # Estimate single cell fitness self.compute_cell_fitness() # Retrieve mutation order for plotting self.extract_mut_order() # Retrieve internal node stats self.internal_nodes_df = get_internal_node_stats(self.tree) logging.info(f'MiTo clonal inference finished. {T.stop()}')
##