"""
Main MiTo class for MT-SNVs single-cell phylogenies annotation.
"""
import logging
import numpy as np
import pandas as pd
import cassiopeia as cs
from tqdm import tqdm
from itertools import product
from typing import Iterable, Tuple
from joblib import Parallel, delayed, parallel_backend, cpu_count
from cassiopeia.data import CassiopeiaTree
from scipy.stats import fisher_exact
from statsmodels.sandbox.stats.multicomp import multipletests
from sklearn.metrics import silhouette_score
from cassiopeia.tools.fitness_estimator._lbi_jungle import LBIJungle
from ..pp.distances import weighted_jaccard
from ..ut.phylo_utils import get_clades, get_internal_node_stats
from ..ut.utils import Timer, rescale
from .phylo import _get_leaves_order
##
def get_ranges_and_gaps(s: pd.Series, label):
vals = s.values
m = (vals == label)
def _get_runs(bool_arr):
padded = np.concatenate(([False], bool_arr, [False]))
d = np.diff(padded.astype(int))
starts = np.where(d == 1)[0]
ends = np.where(d == -1)[0]
return list(zip(starts, ends))
present = _get_runs(m)
gaps = _get_runs(~m)
return present, gaps
##
def find_longest_stretch(ranges, series):
n = len(ranges)
if n == 0:
return (), ()
non_nan = ~series.isna().values
prefix = np.concatenate([[0], non_nan.cumsum()])
best_start = 0
best_len = 1
curr_start = 0
for i in range(1, n):
prev_end = ranges[i-1][1]
curr_begin = ranges[i][0]
if prefix[curr_begin] - prefix[prev_end] == 0:
pass
else:
length = i - curr_start
if length > best_len:
best_len = length
best_start = curr_start
best_end = i - 1
curr_start = i
length = n - curr_start
if length > best_len:
best_len = length
best_start = curr_start
best_end = n - 1
else:
best_end = best_start + best_len - 1
best_seq = tuple(ranges[best_start:best_end+1])
others = tuple(r for idx, r in enumerate(ranges) if idx < best_start or idx > best_end)
return best_seq, others
##
def _get_muts_order(tree):
tree_ = tree.copy()
model = MiToTreeAnnotator(tree_)
model.get_T()
model.get_M()
model.extract_mut_order()
return model.ordered_muts
##
def _compute_mutation_enrichment_batch(start_pos, end_pos, T_columns, tree_values, T_values, muts_size, alpha):
"""
Compute mutation enrichment for a batch of tree nodes using joblib.
Parameters:
- start_pos: start index for node batch
- end_pos: end index for node batch
- T_columns: column names (nodes) to process
- tree_values: tree mutation matrix values
- T_values: T matrix values (cell assignment matrix)
- muts_size: number of mutations
- alpha: significance level for FDR correction
Returns:
- List of tuples: [(node_name, pvals), ...]
"""
results = []
for i in range(start_pos, end_pos):
node_name = T_columns[i]
node_T_values = T_values[:,i]
target_ratio_array = np.zeros(muts_size)
oddsratio_array = np.zeros(muts_size)
pvals = np.zeros(muts_size)
test_lineage = node_T_values == 1
for j in range(muts_size):
test_mut = tree_values[:,j] == 1
n_mut_lineage = np.sum(test_mut & test_lineage)
n_mut_no_lineage = np.sum(test_mut & ~test_lineage)
n_no_mut_lineage = np.sum(~test_mut & test_lineage)
n_no_mut_no_lineage = np.sum(~test_mut & ~test_lineage)
target_ratio_array[j] = n_mut_lineage / (np.sum(test_mut) + 1e-8)
# Fisher's exact test
oddsratio, pvalue = fisher_exact(
[
[n_mut_lineage, n_mut_no_lineage],
[n_no_mut_lineage, n_no_mut_no_lineage],
],
alternative='greater',
)
oddsratio_array[j] = oddsratio
pvals[j] = pvalue
# Adjust p-values (FDR correction)
pvals = multipletests(pvals, alpha=alpha, method="fdr_bh")[1]
results.append((node_name, pvals))
return results
##
[docs]
class MiToTreeAnnotator():
"""
MiTo tree annotation class. Performs clonal inference from an arbitrary
MT-SNVs-based phylogeny.
"""
def __init__(self, tree: CassiopeiaTree):
"""
Initialize class slots from input CassiopeiaTree.
"""
# Slots
self.tree = tree # .copy()
self.T = None
self.M = None
self.solutions = None
self.clone_df = None
self.internal_nodes_df = None
self.clonal_nodes = None
self.mut_df = None
self.ordered_muts = None
self.S_aggregate = None
self.params = {}
##
[docs]
def get_T(self, with_root: bool = True):
"""
Compute the "cell assignment" matrix, T.
T is a cell x clade (internal node) binary matrix mapping each cell i to every clade j.
"""
logging.info('Retrieve cell assignment to tree clades')
clades = get_clades(self.tree, with_root=with_root, with_singletons=True)
T = np.array([[x in clades[clade] for x in self.tree.leaves] for clade in clades]).astype(int)
T = (
pd.DataFrame(T, index=list(clades.keys()), columns=self.tree.leaves)
.T.loc[self.tree.layers['transformed'].index]
)
self.T = T
##
[docs]
def get_M(self, alpha: float = 0.05, n_cores: int = None):
"""
Compute the "mutation enrichment" matrix, M.
M is a mut x clade matrix storing for each mutation i and clade j the enrichment
value defined as -log10(pval) from a Fisher's Exact test.
Uses joblib for parallel processing.
"""
muts = self.tree.layers['transformed'].columns
logging.info(f'Compute mutations enrichment: n muts={len(muts)}')
# Set number of cores
if n_cores is None:
n_cores = max(1, cpu_count()-1)
# Cache matrices once to avoid multiple copies across workers
tree_values = self.tree.layers['transformed'].values
T_values = self.T.values
T_columns = self.T.columns.tolist()
# Prepare batches
n_nodes = len(T_columns)
quotient = n_nodes // n_cores
residue = n_nodes % n_cores
intervals = []
start = end = 0
for i in range(n_cores):
end = start + quotient + (i < residue)
if end == start:
break
intervals.append((start, end))
start = end
# Parallel computation using joblib like the filter pattern
with parallel_backend("loky", inner_max_num_threads=1):
result_list = Parallel(n_jobs=len(intervals))(
delayed(_compute_mutation_enrichment_batch)(
start_pos,
end_pos,
T_columns,
tree_values,
T_values,
muts.size,
alpha
)
for start_pos, end_pos in intervals
)
# Flatten results from all batches
P = {}
for batch_results in result_list:
for node_name, pvals in batch_results:
P[node_name] = pvals
M = pd.DataFrame(P, index=muts)
M = -np.log10(M)
self.M = M
##
def _get_disjoint_clones(self, ordered_clones):
cats = ordered_clones.value_counts().index
resolve_list = []
for cat in cats:
clone_ranges, gaps = get_ranges_and_gaps(ordered_clones, cat)
for r in clone_ranges:
assert (ordered_clones.iloc[r[0]:r[1]]==cat).all()
for g in gaps:
assert (ordered_clones.iloc[g[0]:g[1]]!=cat).all()
gaps = [ g for g in gaps if g[0]!=0 and g[1]!=len(ordered_clones) ]
all_gaps_nans = True
for g in gaps:
if not ordered_clones.iloc[g[0]:g[1]].isna().all():
all_gaps_nans = False
if not all_gaps_nans:
resolve_list.append([cat, clone_ranges])
return resolve_list
##
def _check_inference(self):
ordered_clones = self.tree.cell_meta.loc[_get_leaves_order(self.tree), 'MiTo clone']
clones = ordered_clones.value_counts().index
assert len(self._get_disjoint_clones(ordered_clones))==0
assert (
(self.tree.cell_meta
.groupby('lca')['MiTo clone']
.nunique()==1)
.all()
)
for clone in clones:
nodes = (
self.tree.cell_meta
.loc[self.tree.cell_meta['MiTo clone']==clone, 'lca']
.unique()
)
assert len(nodes) <= 1
if len(nodes)==1:
lca = nodes[0]
cells = (
self.tree.cell_meta
.loc[self.tree.cell_meta['MiTo clone']==clone]
.index.to_list()
)
lca_ = self.tree.find_lca(*cells)
assert(lca==lca_)
##
[docs]
def resolve_ambiguous_clones(
self,
df_predict: pd.DataFrame,
merging_treshold: float = .7,
af_treshold: float = .0,
add_to_meta: bool = False,
) -> Tuple[pd.Series,pd.Series]:
"""
Final clonal resolution process.
Tries to merge similar clones, iteratively. First, the (raw) AF matrix is aggregated
at the clonal level using MiTo clones. Then, clone-clone similarities are computed using
(1-) weighted jaccard distances among these aggregated MT-SNVs clonal profiles.
At each round, the tiniest "ambiguous" clone is selected for merging with its smallest
"interacting clone". If the merge is successfull, the clonal assignment table is updated
and the process go through other merging rounds, until no ambiguous clones remain. Unresolved
clones (if any) are annotated as NaNs in the final MiTo clone column which is appended to
`self.tree.cell_meta`.
"""
self.params['merging_treshold'] = merging_treshold
remained_unresolved = []
try_merge = True
n_trials = 0
# Here we go
while try_merge:
# Aggregate
X_agg = (
self.tree.layers['raw']
.join(df_predict[['MiTo clone']])
.groupby('MiTo clone')
.median()
)
X_agg = X_agg.loc[:,np.any(X_agg>af_treshold, axis=0)]
# Compute similarity of putative clones (i.e., aggregated profiles)
w = np.nanmedian(np.where(X_agg>af_treshold, X_agg, np.nan), axis=0)
S_agg = 1-weighted_jaccard((X_agg>af_treshold).astype(int), w=w)
self.S_aggregate = pd.DataFrame(S_agg, index=X_agg.index, columns=X_agg.index)
# Spot interacting clones
S_agg_long = (
pd.DataFrame(
np.triu(S_agg, k=1),
index=X_agg.index.to_list(),
columns=X_agg.index.to_list()
)
.melt(ignore_index=False)
.reset_index()
)
S_agg_long.columns = ['clone1', 'clone2', 'similarity']
S_agg_long = (
S_agg_long
.query('clone1!=clone2 and similarity>=@merging_treshold')
.reset_index(drop=True).drop_duplicates()
)
if S_agg_long.shape[0] == 0:
try_merge = False
# Attempt merging, starting from the tiniest ambiguous clone
# Gather clonal stats
clone_df = (
df_predict
.loc[:,['MiTo clone', 'lca', 'n cells', 'muts']]
.reset_index(drop=True).drop_duplicates()
.sort_values('n cells')
.dropna()
.rename(columns={'MiTo clone':'old'})
.set_index('old')
)
self.clone_df = clone_df
# Find tiniest ambiguous clone
if try_merge:
ambiguous_clones = set(S_agg_long['clone1']) | set(S_agg_long['clone2'])
for clone in clone_df.index:
if clone in ambiguous_clones and not clone in remained_unresolved:
try_merge = True
break
else:
try_merge = False
if try_merge:
# Find the tiniest ambiguous clone interactor
interactions = S_agg_long.query('clone1==@clone or clone2==@clone')
other_clones = []
for i in range(interactions.shape[0]):
int_clones = interactions.iloc[i,:].values[:2]
other_clones.append(int_clones[int_clones!=clone][0])
int_clone = clone_df.loc[other_clones].sort_values('n cells').index[0]
# Find lca new clone
lca_clone = clone_df.loc[clone, 'lca']
lca_int_clone = clone_df.loc[int_clone, 'lca']
if lca_clone == lca_int_clone:
new_lca = lca_clone
else:
new_lca = self.tree.find_lca(*[lca_clone, lca_int_clone])
# Merge clones mutations
clone_muts = set(clone_df.loc[clone, 'muts'].split(';'))
int_clone_muts = set(clone_df.loc[int_clone, 'muts'].split(';'))
new_clone_muts = ';'.join(list(clone_muts | int_clone_muts))
# Check merging possibility
cells_merged_clone = (
df_predict
.loc[ lambda x: (x['MiTo clone'] == clone) | (x['MiTo clone'] == int_clone)]
.index
)
cells_clone = df_predict.loc[lambda x: x['MiTo clone'] == clone].index
cells_int_clone = df_predict.loc[lambda x: x['MiTo clone'] == int_clone].index
test = len(set(cells_merged_clone) - (set(cells_clone) | set(cells_int_clone)) ) == 0
if test:
df_predict.loc[cells_merged_clone, 'MiTo clone'] = int_clone
df_predict.loc[cells_merged_clone, 'median cell similarity'] = (
self.tree.get_attribute(new_lca, 'similarity')
)
df_predict.loc[cells_merged_clone, 'n cells'] = cells_merged_clone.size
df_predict.loc[cells_merged_clone, 'lca'] = new_lca
df_predict.loc[cells_merged_clone, 'muts'] = new_clone_muts
else:
remained_unresolved.append(clone)
n_trials += 1
# Put unresolved_clones as NaNs
df_predict.loc[df_predict['MiTo clone'].isin(remained_unresolved), 'MiTo clone'] = np.nan
# Last conflict resolution: solve disjoint clones, if any
ordered_clones = df_predict.loc[_get_leaves_order(self.tree), 'MiTo clone']
resolve_list = self._get_disjoint_clones(ordered_clones)
if resolve_list:
for i in range(len(resolve_list)):
clone, clone_ranges = resolve_list[i]
accepted, discarded = find_longest_stretch(clone_ranges, ordered_clones)
accepted_range = (accepted[0][0], accepted[-1][1])
final_clone_cells = ordered_clones.index[accepted_range[0]:accepted_range[1]]
df_predict.loc[final_clone_cells, 'lca'] = (
self.tree.find_lca(*final_clone_cells)
)
final_na_set = []
for r in discarded:
final_na_set.extend(
ordered_clones.index[r[0]:r[1]]
)
df_predict.loc[final_na_set, 'MiTo clone'] = np.nan
df_predict.loc[final_na_set, 'lca'] = np.nan
df_predict.loc[final_na_set, 'muts'] = np.nan
# Rename MiTo clones according to their lca size
sizes = df_predict['lca'].value_counts()
mapping = { x : f'MT-{i}' for i,x in enumerate(sizes.index) }
df_predict['MiTo clone'] = df_predict['lca'].map(mapping)
self.clonal_nodes = df_predict.loc[lambda x: ~x['lca'].isna(), 'lca'].unique().tolist()
# Check if the df_predict info should be added to self.tree.cell_meta
if add_to_meta:
df_predict['MiTo clone'] = pd.Categorical(df_predict['MiTo clone'])
df_predict['lca'] = pd.Categorical(df_predict['lca'])
df_predict['muts'] = pd.Categorical(df_predict['muts'])
cols = [
x for x in self.tree.cell_meta.columns \
if x not in df_predict.columns
]
self.tree.cell_meta = self.tree.cell_meta[cols]
self.tree.cell_meta = self.tree.cell_meta.join(df_predict)
self._check_inference()
return df_predict['MiTo clone'], df_predict['median cell similarity']
##
[docs]
def compute_cell_fitness(self):
"""
LBI method (Neher et al., 2014) from Cassiopeia.
"""
logging.info('Estimate cell fitness scores')
model = LBIJungle(random_seed=1234)
model.estimate_fitness(self.tree)
self.tree.set_attribute(self.tree.root, 'fitness', 0)
d = {}
for node in self.tree.nodes:
d[node] = self.tree.get_attribute(node, 'fitness')
# Rescale
s = pd.Series(d)
s = (s-s.mean()) / s.std(ddof=0)
# Add cell values to tree metadata
self.tree.cell_meta['fitness'] = s.loc[self.tree.cell_meta.index]
##
[docs]
def compute_expansions(self):
"""
Call cassiopeia.tools.compute_expansion_pvalues.
Compute clonal expansion pvalues as descrived in Yang, Jones et al, BioRxiv (2021).
"""
logging.info('Compute expansion pvalues')
cs.tl.compute_expansion_pvalues(self.tree)
##
##
[docs]
def infer_clones(self, similarity_percentile: float = 85, mut_enrichment_treshold: int = 5) -> pd.DataFrame:
"""
A MT-SNVs-specific re-adaptation of the recursive approach described in the MethylTree paper
(... et al., 2025).
"""
# Prep lists for recursion
tree_list = []
df_list = []
# Get tree topology and mutation_enrichment tables
if self.T is None or self.M is None:
self.get_T()
self.get_M()
# Set usable_mutations
is_enriched = (self.M.values>=mut_enrichment_treshold).any(axis=1)
mut_enrichment = self.M.loc[is_enriched,:]
usable_mutations = set(mut_enrichment.index)
self.params['mut_enrichment_treshold'] = mut_enrichment_treshold
self.params['n total mutations'] = self.tree.layers['raw'].shape[1]
self.params['n usable_mutations'] = len(usable_mutations)
# Convert the dissimilarity into similarity matrix
D = self.tree.get_dissimilarity_map()
S = 1-D
# Calculate similarity treshold
similarity_treshold = np.percentile(S.values, similarity_percentile)
self.params['similarity_treshold'] = similarity_treshold
# Set median similarity among clade cells as tree node attributes
clades = get_clades(self.tree, with_singletons=True)
for node in clades:
cells = list(clades[node])
s = np.median(S.loc[cells,cells].values)
self.tree.set_attribute(node, 'similarity', s)
##
# Internal, recursive functions =========================================================== #
def _find_clones(tree, node, usable_mutations):
if not usable_mutations:
_collect_clones(tree, node)
return
if tree.is_leaf(node):
_collect_clones(tree, node)
elif (
((mut_enrichment.loc[list(usable_mutations), node] >= mut_enrichment_treshold).any()) & \
(tree.get_attribute(node, 'similarity') >= similarity_treshold) & \
(not tree.is_leaf(node))
):
triggered = {
mut for mut in usable_mutations \
if mut_enrichment.loc[mut,node] >= mut_enrichment_treshold
}
tree.set_attribute(node, 'muts', list(triggered))
new_usable = usable_mutations - triggered
_refine_clones(tree, node, new_usable)
else:
for child in tree.children(node):
_find_clones(tree, child, usable_mutations)
##
def _collect_clones(tree, node_tmp):
tree_list.append(node_tmp)
leaves = tree.leaves_in_subtree(node_tmp)
df_tmp = pd.DataFrame({"cell": leaves})
df_tmp["MiTo clone"] = f"MT-{len(df_list)}"
df_tmp["median cell similarity"] = tree.get_attribute(node_tmp, 'similarity')
df_tmp["n cells"] = len(leaves)
df_tmp['lca'] = tree.find_lca(*leaves) if len(leaves)>1 else np.nan
try:
df_tmp['muts'] = ';'.join(tree.get_attribute(node_tmp, 'muts'))
except:
pass
df_list.append(df_tmp)
##
def _collect_clade_info(tree, node_tmp, usable_mutations, level=0, data_list=[], name="0"):
valid_tmp = (
((mut_enrichment.loc[list(usable_mutations), node_tmp] >= mut_enrichment_treshold).any()) & \
(tree.get_attribute(node_tmp, 'similarity') >= similarity_treshold) & \
(not tree.is_leaf(node_tmp))
)
data_list.append([level, int(valid_tmp), len(tree.leaves_in_subtree(node_tmp)), name])
for j0, child_tmp in enumerate(tree.children(node_tmp)):
_collect_clade_info(
tree, child_tmp, usable_mutations, level=level+1,
data_list=data_list, name=f"{name},{j0}"
)
##
def _add_clade_info(tree, node_tmp, target_level, target_name_list, level=0, name="0"):
valid_list = []
invalid_list = []
for target_name in target_name_list:
condition_1 = target_name == name # this sub-tree matches our target names
condition_2 = target_name.startswith(name) # this sub-tree is along the branch that leads to the target sub-tree, therefore acceptable
condition_3 = name.startswith(target_name) # this sub-tree is contained within one of the target sub-trees
valid_list.append(condition_1)
invalid_list.append(condition_2 or condition_3)
valid = np.sum(valid_list) > 0
if not valid:
valid = np.sum(invalid_list) == 0
if valid:
_collect_clones(tree, node_tmp)
if name not in target_name_list:
target_name_list.append(name)
if level < target_level:
for j0, child_tmp in enumerate(tree.children(node_tmp)):
_add_clade_info(
tree,
child_tmp,
target_level,
target_name_list,
level=level + 1,
name=f"{name},{j0}",
)
else:
return
##
def _refine_clones(tree, node, usable_mutations):
if not usable_mutations:
_collect_clones(tree, node)
return
data_list = []
_collect_clade_info(tree, node, usable_mutations, level=0, data_list=data_list)
df_tmp_orig = pd.DataFrame(
np.array(data_list), columns=["level", "score", "cell_N", "name"]
)
df_tmp_orig["level"] = df_tmp_orig["level"].astype(int)
df_tmp_orig["score"] = df_tmp_orig["score"].astype(float)
df_tmp_orig["cell_N"] = df_tmp_orig["cell_N"].astype(int)
df_tmp = (
df_tmp_orig.groupby("level")
.agg({"score": "mean", "cell_N": "sum"})
.reset_index()
)
df_tmp = df_tmp[(df_tmp["score"] == 1)]
target_level = df_tmp["level"].max()
target_name_list = df_tmp_orig[df_tmp_orig["level"] == target_level]["name"].to_list()
_add_clade_info(tree, node, target_level, target_name_list)
# ============================================================================================ #
# Fire recursion!
tree_ = self.tree.copy()
_find_clones(tree_, tree_.root, usable_mutations)
# Get results
df_predict = pd.concat(df_list, ignore_index=True)
df_predict = df_predict.set_index('cell')
df_predict.loc[df_predict['muts'].isna(), 'MiTo clone'] = np.nan
return df_predict
##
[docs]
def clonal_inference(
self,
similarity_tresholds: Iterable[int] = [ 85, 90, 95, 99 ],
mut_enrichment_tresholds: Iterable[int] = [ 3, 5, 10 ],
merging_treshold: Iterable[float] = [ .25, .5, .75 ],
af_treshold: float = .0,
weight_silhouette: float =.3,
weight_n_clones: float = .4,
weight_similarity: float = .3,
max_fraction_unassigned: float = .05,
n_cores: int = None,
):
"""
Optimize tresholds for `self.infer_clones` and pick clonal labels with
best silhouette scores across the attempted splits.
"""
T = Timer()
T.start()
# Get tree topology and mutation_enrichment tables
if self.T is None or self.M is None:
self.get_T()
self.get_M(n_cores=n_cores)
# Define combos
combos = list(product(similarity_tresholds, mut_enrichment_tresholds, merging_treshold))
logging.info(f'Start Grid Search. n hyper-parameter combinations to explore: {len(combos)}')
silhouettes = []
unassigned = []
n_clones = []
similarities = []
# Grid search
for i, (s, m, j) in enumerate(tqdm(combos, total=len(combos), desc="Grid Search")):
try:
df_predict = self.infer_clones(similarity_percentile=s, mut_enrichment_treshold=m)
labels, sim = self.resolve_ambiguous_clones(
df_predict, merging_treshold=j, af_treshold=af_treshold, add_to_meta=False
)
test = labels.isna()
labels = labels.loc[~test]
D = self.tree.get_dissimilarity_map().loc[labels.index,labels.index]
assert (D.index == labels.index).all()
sil = silhouette_score(X=D.values, labels=labels.values, metric='precomputed') if labels.unique().size>=2 else 0
silhouettes.append(sil)
unassigned.append(test.sum()/labels.size)
n_clones.append(labels.unique().size)
similarities.append(sim.mean())
except:
# Some error occurred with this hyper-parameter combo
pass
# Pick optimal combination, and perform final splitting
self.solutions = (
pd.DataFrame({'silhouette':silhouettes, 'unassigned':unassigned,
'n_clones':n_clones, 'similarity':similarities})
.assign(
sil_rescaled = lambda x: rescale(x['silhouette']),
sim_rescaled = lambda x: rescale(x['similarity']),
n_clones_rescaled = lambda x: rescale(-x['n_clones']),
score = lambda x:
weight_silhouette * x['sil_rescaled'] + \
weight_n_clones * x['n_clones_rescaled'] + \
weight_similarity * x['similarity']
)
.sort_values('score', ascending=False)
)
ranked_solutions = (
self.solutions
.query('unassigned<=@max_fraction_unassigned')
.sort_values('score', ascending=False)
)
if ranked_solutions.shape[0]>0:
s, m, j = combos[ranked_solutions.index[0]]
else:
raise ValueError(
f'''
None of the solution tested falls below the
max_fraction_unassigned treshold: {max_fraction_unassigned}
'''
)
# Final round
logging.info(f'Hyper-params chosen: similarity_percentile={s}, mut_enrichment_treshold={m}, merging_treshold={j}')
df_predict = self.infer_clones(similarity_percentile=s, mut_enrichment_treshold=m)
_, _ = self.resolve_ambiguous_clones(
df_predict, merging_treshold=j, af_treshold=af_treshold, add_to_meta=True
)
# Calculate clonal expansions
self.compute_expansions()
# Estimate single cell fitness
self.compute_cell_fitness()
# Retrieve mutation order for plotting
self.extract_mut_order()
# Retrieve internal node stats
self.internal_nodes_df = get_internal_node_stats(self.tree)
logging.info(f'MiTo clonal inference finished. {T.stop()}')
##