from collections import OrderedDict
from VirtualMicrobes.virtual_cell import PhyloUnit
import numpy as np
[docs]class PhyloGeneticAnalysis(object):
'''
Analyze biological networks
'''
def __init__(self):
'''
'''
pass
[docs]def find_homologs(gene, genome):
"""
For a gene, find all its homologs in a given genome.
This is a naive approach that uses a combination of the gene's type and its
:class:`VirtualMicrobes.virtual_cell.Identifier.Identifier` attribute to
detect common descent.
Parameters
----------
gene : :class:`VirtualMicrobes.virtual_cell.GenomicElement.GenomicElement`
Reference gene for homology search.
genome : :class:`VirtualMicrobes.virtual_cell.Genome.Genome`
Genome in which to search for homologs.
Returns
-------
The set of homologs of `gene` in the `genome`.
"""
homologous = set()
for g in genome:
if (type(g) == type(gene) and gene.id.is_copy(g.id)):
homologous.add(g)
return homologous
[docs]def find_homolog_distances(gene, genome, closest_homolog=False):
"""
Find homologs and their distance for a gene in a target genome.
Parameters
----------
gene : :class:`VirtualMicrobes.virtual_cell.GenomicElement.GenomicElement`
Reference gene for homology search.
genome : :class:`VirtualMicrobes.virtual_cell.Genome.Genome`
Genome in which to search for homologs.
closest_homolog : bool
Flag to filter found homologs to those that have the shortest
phylogenetic distance to the `gene`.
"""
age = gene.time_birth
#print 'gene age', age
homolog_distance_dict = dict()
for h in find_homologs(gene, genome):
dist = 0
h_age = h.time_birth
if h is gene:
dist = 0
homolog_distance_dict[h] = dist
elif gene.parent_of(h):
dist = age - h_age
homolog_distance_dict[h] = dist
#print 'found parent', h, 'at distance', dist
elif gene.child_of(h):
dist = h_age - age
homolog_distance_dict[h] = dist
#print 'found child', h, 'at distance', dist
else:
common_ancestors = gene.common_ancestors(h)
ca_dists = []
for ca in common_ancestors:
ca_dists.append(age + h_age - 2 * ca.time_birth)
if len(ca_dists):
homolog_distance_dict[h] = min(ca_dists)
if closest_homolog and len(homolog_distance_dict):
min_dist = min(homolog_distance_dict.values())
homolog_distance_dict = { k:v for k,v in homolog_distance_dict.items()
if v == min_dist }
return homolog_distance_dict
[docs]def calculate_overlap(tf_connections, connections_of_homologous_tfs,
closest_bound_homologs_dict):
"""
Calculate the overlap in bound genes between tf homologs.
Parameters
----------
tf_connections : list of :class:`VirtualMicrobes.virtual_cell.Gene.Gene`\s
Downstream connections of the reference TF
connections_of_homologous_tfs : list of sets of :class:`VirtualMicrobes.virtual_cell.Gene.Gene`\s
List of sets of downstream genes for each homolog of the reference TF
closest_bound_homologs_dict : dict of sets of :class:`VirtualMicrobes.virtual_cell.Gene.Gene`\s
Mapping of each original downstream gene of the reference TF to sets of
homologs of these downstream genes.
Returns
-------
float,float
Tuple of fractions:
[0]: Fraction of downstream genes who's homologs are bound by a homolog of
the reference TF.
[1]: Fraction of new connections (averaged over tf homologs) per original
connection of the reference TF.
"""
connections_still_bound = 0
nr_homologous_tfs = len(connections_of_homologous_tfs)
all_closest_homolog_of_bound_genes = set()
all_connections_of_homologous_tfs = set()
for bound_gene in tf_connections:
closest_homologs_of_bound_gene = set(closest_bound_homologs_dict[bound_gene])
all_closest_homolog_of_bound_genes |= closest_homologs_of_bound_gene
still_bound = False
for connections in connections_of_homologous_tfs:
overlap = connections & closest_homologs_of_bound_gene
all_connections_of_homologous_tfs |= connections
if len(overlap) > 0: # the homolog of the tf binds a homolog of the bound gene
still_bound = True
if still_bound:
connections_still_bound += 1
all_new = all_connections_of_homologous_tfs - all_closest_homolog_of_bound_genes
fraction_conserved = np.NaN
if len(tf_connections):
fraction_conserved = connections_still_bound / float(len(tf_connections))
fraction_new = np.NaN
if len(tf_connections) and nr_homologous_tfs:
avrg_new_connections = len(all_new) / float(nr_homologous_tfs)
fraction_new = avrg_new_connections / float(len(tf_connections))
return fraction_conserved , fraction_new
[docs]def tf_binding_overlap(cell1, cell2, closest_homolog=False, no_phylogeny=False, verbose=False):
"""
Measure the overlap in target genes for tf homologs in phylogenetically
related individuals.
cell1 : :class:`VirtualMicrobes.virtual_cell.Cell.Cell`
Reference individual for which to find homologs
cell2 : :class:`VirtualMicrobes.virtual_cell.Cell.Cell`
Homologs of TFs and downstream targets will be detected in this individual.
closest_homolog : bool
Flag to filter found homologs to those that have the shortest
phylogenetic distance to the `gene`.
verbose : bool
Print messages about homologs found.
Returns
-------
dict
Mapping from :class:`VirtualMicrobes.virtual_cell.Gene.TranscriptionFactor` to (maximum)
binding overlap score.
"""
overlap_dict, new_dict = OrderedDict(), OrderedDict()
cell1_tf_connections = cell1.genome.tf_connections_dict()
cell2_tf_connections = cell2.genome.tf_connections_dict()
if verbose:
print 'cell1', cell1.time_birth, '-> cell2', cell2.time_birth
for tf in set(cell1.genome.tfs):
bound_genes = cell1_tf_connections[tf]
if isinstance(tf,PhyloUnit.PhyloUnit) and not no_phylogeny:
#print 'using detailed phylogenetic distance information'
closest_tf_homologs = find_homolog_distances(tf, cell2.genome, closest_homolog)
closest_bound_homolog_dict = dict()
for bg in bound_genes:
closest_bound_homolog_dict[bg] = find_homolog_distances(bg, cell2.genome, closest_homolog)
else:
#print 'No phylogeny stored for object. Using simple homology by identifier information.'
closest_tf_homologs = find_homologs(tf, cell2.genome)
closest_bound_homolog_dict = dict()
for bg in bound_genes:
closest_bound_homolog_dict[bg] = find_homologs(bg, cell2.genome)
if verbose:
print '\tfound', sum( [len(homologs) for homologs in closest_bound_homolog_dict.values() ]),
print 'homologs for', len(bound_genes), 'downstream genes bound now'
overlap_dict[tf], new_dict[tf] = calculate_overlap(bound_genes,
[ cell2_tf_connections[h_tf]
for h_tf in closest_tf_homologs ],
closest_bound_homolog_dict)
if verbose:
print
return overlap_dict, new_dict