from __builtin__ import int
import collections
import copy
import glob
import os
import random
import re
import shutil
from VirtualMicrobes.cython_gsl_interface import integrate
from VirtualMicrobes.environment.Environment import Locality
from VirtualMicrobes.readwrite import write_obj
import VirtualMicrobes.my_tools.utility as util
from VirtualMicrobes.plotting.Graphs import BindingNetwork, MetabolicNetwork, Genome, PhyloTreeGraph
import VirtualMicrobes.simulation.Simulation as simu
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
def _plot_cell_graphs(cell, GRN_grapher, metabolome_grapher, genome_grapher, max_genome_size, suffixes):
'''
Draw all graphs for cell
Parameters
----------
cell : :class:`VirtualMicrobes.virtual_cell.Cell.Cell`
GRN_grapher : :class:`VirtualMicrobes.plotting.Graphs.BindingNetwork`
draws gene regulatory network graphs
metabolome_grapher : :class:`VirtualMicrobes.plotting.Graphs.MetabolicNetwork`
draws metabolic network
genome_grapher : :class:`VirtualMicrobes.plotting.Graphs.Genome`
draws genome layout graphs
'''
#for nodes in cell.nodes_edges():
# print str(nodes[1])
GRN_grapher.init_network(cell)
GRN_grapher.layout_network_positions(prog='nwx')
GRN_grapher.redraw_network()
GRN_grapher.update_figure()
# label_set.append('apo')
for suffix in suffixes:
GRN_grapher.save_fig(labels=[str(cell.time_birth).zfill(10), 'nwx'], suffix=suffix, bbox_inches='tight')
GRN_grapher.layout_network_positions(prog='dot')
GRN_grapher.redraw_network()
GRN_grapher.update_figure()
for suffix in suffixes:
GRN_grapher.save_fig(labels=[str(cell.time_birth).zfill(10), 'bound', 'dot'], suffix=suffix, bbox_inches='tight')
GRN_grapher.redraw_network(edge_effect='effectApo')
GRN_grapher.update_figure()
for suffix in suffixes:
GRN_grapher.save_fig(labels=[str(cell.time_birth).zfill(10), 'apo', 'dot'], suffix=suffix, bbox_inches='tight')
GRN_grapher.write_to_file(labels=[str(cell.time_birth).zfill(10), 'apo','dot'],suffix='.dot')
GRN_grapher.clear_graph()
genome_grapher.plot_genome_structure(cell, labels=[str(cell.time_birth).zfill(10)],
max_size=max_genome_size)
genome_grapher.update_figure()
for suffix in suffixes:
genome_grapher.save_fig(labels=[str(cell.time_birth).zfill(10)], suffix=suffix)
def _plot_cell_time_course(cell, sim_graphs, save_dir, suffixes):
'''
Plot all time courses within the life span of an individual.
Parameters
----------
cell : :class:`VirtualMicrobes.virtual_cell.Cell.Cell`
sim_graphs : :class:`VirtualMicrobes.plotting.Graphs.Graphs`
simulation grapher object that draws the plots
save_dir : str
suffixes : list of file suffixes
'''
fig = plt.figure(figsize=(12,12))
mol_ax = fig.add_subplot(321)
mol_ax.set_title('internal molecule conc')
prot_ax = fig.add_subplot(323)
prot_ax.set_title('protein concentration')
size_ax = fig.add_subplot(322)
size_ax.set_title('cell size')
tox_ax = fig.add_subplot(324)
tox_ax.set_title('toxicity')
prod_ax = fig.add_subplot(326)
prod_ax.set_title('production')
size_dat = cell.get_cell_size_time_course()
if not len(size_dat[0,:]):
return
mol_dat = cell.get_mol_time_course_dict()
prot_dat = cell.get_gene_type_time_course_dict()
tox_dat = cell.get_toxicity_time_course()
prod_dat = cell.get_raw_production_time_course()
sim_graphs.plot_mol_class_data(mol_ax, mol_dat)
sim_graphs.plot_prot_data(prot_ax, prot_dat)
title = size_ax.get_title()
size_ax.clear()
size_ax.set_title(title)
size_ax.plot(size_dat[0,:],size_dat[1,:])
title = tox_ax.get_title()
tox_ax.clear()
tox_ax.set_title(title)
tox_ax.plot(tox_dat[0,:], tox_dat[1,:])
title = prod_ax.get_title()
prod_ax.clear()
prod_ax.set_title(title)
prod_ax.plot(prod_dat[0,:], prod_dat[1,:])
for suffix in suffixes:
fig.savefig(os.path.join(save_dir,'time_course_'+str(cell.time_birth).zfill(10)+suffix),
bbox_inches='tight')
def _lod_time_course_data(ancestors, base_save_dir, viewer_path, chunk_size=100 ):
'''
Write time series data in the line of descent.
Concatenates time courses of individuals along a :class:`LOD`.
Concatenations are done in *chunks* of a chosen `chunk_size`. For each chunk
**.csv** files are stored in a directory named part*n*, where *n* is the
chunk number.
Parameters
----------
ancestors : list of :class:`VirtualMicrobes.virtual_cell.Cell.Cell`\s
base_save_dir : str
viewer_path : str
path to utility files for html data viewer
chunk_size : int
length of chunks of concatenated data
'''
# divide the ancestors in LOD into chunks; concatenate time courses per chunk
for part, anc_chunk in enumerate(util.chunks(ancestors, chunk_size)):
num = str(part).zfill(5)
save_dir = os.path.join(base_save_dir, 'part{}'.format(num))
util.ensure_dir(save_dir)
for filename in glob.glob(os.path.join(viewer_path, '*')):
shutil.copy2(filename, save_dir)
prod_series = []
cell_size_series = []
tox_series = []
mol_dfs = []
prot_dfs = []
for anc in anc_chunk:
# production data
ts_dat = anc.get_raw_production_time_course()
ts = pd.Series(data=ts_dat[1], index=ts_dat[0])
prod_series.append(ts)
# cell size data
ts_dat = anc.get_cell_size_time_course()
ts = pd.Series(data=ts_dat[1], index=ts_dat[0])
cell_size_series.append(ts)
# toxicity data
ts_dat = anc.get_toxicity_time_course()
ts = pd.Series(data=ts_dat[1], index=ts_dat[0])
tox_series.append(ts)
# metabolite data
mol_time_courses = anc.get_mol_time_course_dict()
mol_series = dict()
for mol, tc in mol_time_courses.items():
mol_series[mol] = pd.Series(data=tc[1], index=tc[0], name=mol)
mol_df = pd.DataFrame(mol_series)
mol_dfs.append(mol_df)
# protein data
prot_time_courses = anc.get_total_reaction_type_time_course_dict()
prot_series = dict()
for _reac_type, tc_dict in prot_time_courses.items():
for reac, tc in tc_dict.items():
if isinstance(reac, tuple):
reac, exp = reac
name = str(reac)
name += '-e' if exp else '-i'
else:
name = str(reac)
prot_series[name] = pd.Series(data=tc[1],
index=tc[0],
name=name )
prot_df = pd.DataFrame(prot_series)
prot_dfs.append(prot_df)
# concatenate each data type and write to file
prod_series = pd.concat(prod_series)
prod_series = prod_series[~prod_series.index.duplicated(keep='last')]
ts_base_name = os.path.join(save_dir,'production_time_course')
prod_series.to_csv(ts_base_name+'.csv', index_label='time point')
cell_size_series = pd.concat(cell_size_series)
cell_size_series = cell_size_series[~cell_size_series.index.duplicated(keep='last')]
ts_base_name = os.path.join(save_dir,'cell_size_time_course')
cell_size_series.to_csv(ts_base_name+'.csv', index_label='time point')
tox_series = pd.concat(tox_series)
tox_series = tox_series[~tox_series.index.duplicated(keep='last')]
ts_base_name = os.path.join(save_dir,'toxicity_time_course')
tox_series.to_csv(ts_base_name+'.csv', index_label='time point')
mol_df_combine = pd.concat(mol_dfs)
mol_df_combine = mol_df_combine[~mol_df_combine.index.duplicated(keep='last')]
df_base_name = os.path.join(save_dir,'mol_time_courses')
mol_df_combine.to_csv(df_base_name+'.csv', index_label='time point')
prot_df_combine = pd.concat(prot_dfs)
prot_df_combine = prot_df_combine[~prot_df_combine.index.duplicated(keep='last')]
df_base_name = os.path.join(save_dir,'prot_time_courses')
prot_df_combine.to_csv(df_base_name+'.csv', index_label='time point')
[docs]class LOD_Analyser(object):
'''
Analyses the evolutionary history of a population by tracing ancestors in the
line of descent.
Loads a simulation save from a file, keeping a reference in :attr:`ref_sim`.
From this, initialise :attr:`ref_pop_hist` as a :class:`PopulationHistory`
object that analyses the phylogenetic tree of the population.
The :class:`PopulationHistory` generates a :class:`LOD` for 1 or more
individuals in the saved population. For each :class:`LOD`, evolutionary
data and network and genome plots can be produced.
It is possible to load additional simulation snapshots that preceed the
:attr:`ref_pop_hist` and compare individuals to their contemporaries present
in the preceding populations. :attr:`compare_saves` contains a list of file
names of populations-saves that should be compared.
'''
args = None
'''config and command line arguments used for initialisation'''
ref_sim = None
''':class:`VirtualMicrobes.simulation.Simulation` snapshot to analyse'''
ref_pop_hist = None
''':class:`PopulationHistory` for the reference simulation (`ref_sim`) snapshot '''
compare_saves = []
'''names of snapshot files to copmare to `ref_sim`'''
def __init__(self, args):
'''
Initialize the analyzer from an argument dictionary.
Load the population save from file :param:`args`.pop_save and initialize
special fields in its :class:`data_tools.store.DataStore` that can hold
ancestor tracing data. From :attr:`ref_sim` initialize :attr:`ref_pop_hist`
as a :class:`PopulationHistory` that can be used to generate and analyze
the evolutionary history of the population stored in :attr:`ref_sim`.
Parameters
----------
args : dict
arguments attribute dictionary
'''
self.args = args
self.init_compare_saves(args.compare_saves)
self.ref_sim = simu.load_simulation(args.pop_save, **vars(args))
print 'historic maximum of production medium:', self.ref_sim.system.population.historic_production_max
self.init_ref_history()
[docs] def init_compare_saves(self, compare_saves):
'''
Parse and check compare saves parameter.
Compare saves can be either a list of file names or a list of generation
times (or None). In the latter case, the file names should be
constructed using the time point and the file name of the reference
simulation. Checks are made to ensure files exist and also to ensure
that no compares save points come after the reference simulation save
point, as this would not make sense in the comparison functions.
'''
gen_re = re.compile(r'(\d+)(?=\.sav)')
if compare_saves is None:
self.compare_saves = compare_saves
return
elif all(map(lambda v: isinstance(v, int), compare_saves)):
sub = lambda i: gen_re.sub(str(i), self.args.pop_save)
self.compare_saves = map(sub, compare_saves)
else:
self.compare_saves = compare_saves
m = gen_re.search(self.args.pop_save)
if m:
ref_gen = int(m.group(0))
else:
raise Exception('Could not find a generation time for pop_save "{}"'.format(self.args.pop_save))
for f in self.compare_saves:
if not os.path.exists(f):
raise Exception('compare save file "{}" could not be found'.format(f))
m = gen_re.search(f)
if m:
gen = int(m.group(0))
if gen > ref_gen:
raise Exception('Can not compare a save point "{}"'
' beyond the simulation time "{}" '
'of the reference simulation'.format(gen, ref_gen))
else:
raise Exception('Could not find a generation time '
'for compare save "{}"'.format(f))
[docs] def init_ref_history(self, ref_sim=None, nr_lods=None,
prune_depth=0, pop_hist_dir='population_history'):
'''
Create a :class:`PopulationHistory` from the :attr:`ref_sim`
:class:`VirtualMicrobes.simulation.Simulation.Simulation` object.
For the :class:`PopulationHistory` object constructs its phylogenetic tree
and prune back the tree to a maximum depth of (max_depth - prune_depth)
counted from the root. Then create :class:`LOD` objects representing
the *line of descent* of the `nr_lods` *most diverged* branches in the tree.
Parameters
----------
ref_sim : :class:`VirtualMicrobes.simulation.Simulation.Simulation` object
simulation snapshot that is the basis for `LOD` analysis
nr_lods : int nr_lods
nr of separate (most distant) :class:`LOD`\s to initialize
prune_depth : int
prune back the phylogenetic tree with this many timesteps
pop_hist_dir : str
name of directory to store lod analysis output
'''
if ref_sim is None:
ref_sim = self.ref_sim
if nr_lods is None:
nr_lods = self.args.nr_lods
tp = ref_sim.run_time
save_dir = os.path.join(ref_sim.save_dir, pop_hist_dir+'_'+str(tp))
self.ref_pop_hist = PopulationHistory(sim=self.ref_sim,
params=self.ref_sim.params,
save_dir=save_dir,
prune_depth=prune_depth)
self.ref_pop_hist.init_phylo_tree()
self.ref_pop_hist.init_lods(nr_lods)
self.ref_pop_hist._init_pop_hist_data_store()
[docs] def compare_to_pops(self):
'''
Compare reference simulation to a set of previous population snapshots.
Compares each of the simulation snapshot saves in :attr:`compare_saves`
to the :attr:`ref_pop_hist`. A :class:`PopulationHistory` is constructed for
each of the compare snapshots. Within the compare snapshot, individuals
that correspond to the are part of (any of) the :class:`LOD`(s) of the
:attr:`ref_pop_hist` will be identified. Properties of these *ancestors*
will then be compare with their statistical values for the whole
population.
'''
# TODO check that compare saves are not older than the reference
# and raise error when it is the case:
if not self.args.skip_store:
self.ref_sim.data_store.init_ancestry_compare_stores(self.ref_pop_hist)
for compare_save in sorted(self.compare_saves, key=lambda n: int(n.strip('.sav').split('_')[-1])):
self.ref_pop_hist.compare_to_pop(compare_save)
[docs] def lod_stats(self, stride=None, time_interval=None, lod_range=None):
'''
Write time series of evolutionary changes along all :class:`LOD`\s.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
'''
if stride is not None and time_interval is not None:
raise Exception('defining both lod_generation_interval and lod_time_interval is not allowed')
if stride is None:
stride = self.args.lod_generation_interval
if time_interval is None:
time_interval = self.args.lod_time_interval
if lod_range is None:
lod_range = self.args.lod_range
print 'Running LOD stats'
self.ref_pop_hist.lod_stats(stride, time_interval, lod_range)
[docs] def lod_cells(self, stride=None, time_interval=None, lod_range=None, runtime=None):
'''
Write time series of evolutionary changes along all :class:`LOD`\s.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
'''
if stride is not None and time_interval is not None:
raise Exception('defining both lod_generation_interval and lod_time_interval is not allowed')
if stride is None:
stride = self.args.lod_generation_interval
if time_interval is None:
time_interval = self.args.lod_time_interval
if lod_range is None:
lod_range = self.args.lod_range
if runtime is None:
runtime=-1
print 'Saving LOD cells'
self.ref_pop_hist.dump_lod_cells(runtime)
[docs] def anc_cells(self, runtime=None, tcs=False):
''' Dump all cells in the fossil record (e.g. to map onto the newick trees)'''
print 'saving anc cells'
if runtime is None:
runtime = -1
self.ref_pop_hist.dump_anc_cells(runtime)
[docs] def pop_cells(self, runtime=None, tcs=False):
'''Dump all cells in this save file'''
if runtime is None:
runtime = -1
prunegens = self.args.prune_pop_cells
print 'dumping pop cells'
#for p in cell.parents:
# print p
# print cell.parents[0]
self.ref_pop_hist.dump_pop_cells(runtime,prunegens)
[docs] def lod_network_stats(self,stride=None, time_interval=None,
lod_range=None):
'''
Write time series for evolutionary network property changes along all :class:`LOD`\s.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
'''
if stride is not None and time_interval is not None:
raise Exception('defining both lod_generation_interval and lod_time_interval is not allowed')
if stride is None:
stride = self.args.lod_generation_interval
if time_interval is None:
time_interval = self.args.lod_time_interval
if lod_range is None:
lod_range = self.args.lod_range
print 'Running LOD Network stats'
self.ref_pop_hist.lod_network_stats(stride, time_interval, lod_range)
[docs] def lod_binding_conservation(self, stride=None, time_interval=None,
lod_range=None):
'''
Write time series for TF binding conservation for :class:`LOD`\s.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
'''
if stride is not None and time_interval is not None:
raise Exception('defining both lod_generation_interval and lod_time_interval is not allowed')
if stride is None:
stride = self.args.lod_generation_interval
if time_interval is None:
time_interval = self.args.lod_time_interval
if lod_range is None:
lod_range = self.args.lod_range
print 'Running LOD binding conservation'
self.ref_pop_hist.lod_binding_conservation(stride, time_interval,
lod_range)
[docs] def draw_ref_trees(self):
'''Draw a reference phylogenetic tree, with individual, selected :class:`LOD`\s marked'''
self.ref_pop_hist.draw_ref_trees()
[docs] def lod_graphs(self, stride=None, time_interval=None, lod_range=None, formats=None):
'''Draw network and genome graphs for :class:`LOD`\s
It is possible to set an interval and a range to sample individuals in
the :class:`LOD`.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
Note
----
Either use a stride or a time interval to sample individuals from the lod.
'''
if stride is not None and time_interval is not None:
raise Exception('defining both lod_generation_interval and lod_time_interval is not allowed')
if stride is None:
stride = self.args.lod_generation_interval
if time_interval is None:
time_interval = self.args.lod_time_interval
if lod_range is None:
lod_range = self.args.lod_range
if formats is None:
formats = self.args.image_formats
self.ref_pop_hist.plot_lod_graphs(stride, time_interval, lod_range, formats)
[docs] def lod_time_courses(self, lod_range=None, chunk_size=None):
'''
Write time series of molecule concentrations within the :class:`LOD`
It is possible to set a range to sample individuals in
the :class:`LOD`.
Parameters
----------
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
chunk_size : int
number of generations in LOD to concatenate per chunk
'''
if lod_range is None:
lod_range = self.args.lod_range
if chunk_size is None:
chunk_size = self.args.time_course_chunk_size
self.ref_pop_hist.lods_time_course_data(lod_range, chunk_size)
[docs] def lod_time_course_plots(self, stride=None, time_interval=None, lod_range=None, formats=None):
'''
Draw time course diagrams for individuals in the :class:`LOD`\s.
It is possible to set an interval and a range to sample individuals in
the :class:`LOD`.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
Note
----
Either use a stride or a time interval to sample individuals from the lod.
'''
if stride is not None and time_interval is not None:
raise Exception('defining both lod_generation_interval and lod_time_interval is not allowed')
if stride is None:
stride = self.args.lod_generation_interval
if time_interval is None:
time_interval = self.args.lod_time_interval
if lod_range is None:
lod_range = self.args.lod_range
if formats is None:
formats = self.args.image_formats
self.ref_pop_hist.lods_time_course_plots(stride, time_interval, lod_range, formats)
[docs] def write_newick_trees(self):
'''write newick trees for all phylogenies in attr:`ref_pop_hist`'''
self.ref_pop_hist.write_newick_trees()
def __enter__(self):
return self
def __exit__(self, _type, _value, _traceback):
self.ref_sim.close_phylo_shelf()
def __str__(self):
return str(self.ref_pop_hist)
[docs]class PopulationHistory(object):
'''
Performs and stores evolutionary history analysis of
:class:`VirtualMicrobes.simulation.Simulation.Simulation` snapshots.
Generates :class:`LOD`\s for 1 or more individuals in the population.
Reconstruct the evolutionary events along the line of descent.
A reference :class:`PopulationHistory` can also be compared to *population
history* at earlier simulation time points. In this case the ancestors of
individuals in the reference *population history* will be identified and
compared to the rest of the population at that point in time. In this way,
evolutionary biases on the line of descent can be brought to light.
'''
sim = None
'''The :class:`VirtualMicrobes.simulation.Simulation.Simulation` snapshot for which this pophist was made.'''
params = None
'''The (updated) simulation parameters.'''
prune_depth = 0
'''Number of generations from leaves to prune the phylogenetic tree of the pophist.'''
population = None
'''Short cut to :class:`VirtualMicrobes.virtual_cell.Population.Population` of `sim`.'''
environment = None
'''Short cut to :class:`VirtualMicrobes.environment.Environment` of `sim`.'''
time_point = None
'''Last simulation time of the `sim`.'''
tree_lods = []
'''List of lists of :class:`LOD`\s. One list for each independent phylogenetic tree within the population.'''
def __init__(self, sim, params, save_dir=None ,prune_depth=None):
'''
Set parameters for phylogenetic analysis.
Parameters
----------
sim : :class:`VirtualMicrobes.simulation.Simulation.Simulation
params : dict
the simulation parameters
save_dir : str
path to save analysis data to
prune_depth : int
number of generations to prune from the leafs of phylogenetic tree
'''
print 'Initialising population history'
self.save_dir = save_dir
if self.save_dir is not None:
util.ensure_dir(self.save_dir)
self.sim = sim
self.params = params
self.prune_depth = prune_depth
self.population = sim.system.population
for anc in self.population.current_ancestors:
if self.params.reconstruct_grn:
anc.update_grn()
self.environment = sim.system.environment
self.time_point = sim.run_time
self._init_rand_gens()
self._init_test_bed()
self.tree_lods = list()
self.lods = collections.OrderedDict() # storing all the lods in this dictionary, keyed by their leaf
[docs] def init_phylo_tree(self, prune_depth=None):
'''
Update the phylogenetic tree of the population.
Clears the change in the population of the final regular simulation
step. Prunes back the tree to a maximum depth.
Parameters
----------
prune_depth : int
number of generations to prune from the leafs of phylogenetic tree
'''
if prune_depth is None:
prune_depth = self.prune_depth
self.population.clear_pop_changes()
self.population.update_phylogeny()
[docs] def init_lods(self, nr_lods, save_dir=None,
stride=None, time_interval=None, lod_range=None):
'''
Initialize the line of descent (:class:`LOD`) container objects.
Iterate over the phylogenetic trees of the :attr:`population` and for
each tree select `nr_lods` leaf nodes that are at maximum phylogenetic
distance.
For each of the selected leafs, construct a line of descent object
(:class:`LOD`).
Parameters
----------
nr_lods : int
number of :class:`LOD` objects per phylogenetic tree
save_dir : str
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
'''
if save_dir is None:
save_dir = self.save_dir
if stride is None:
stride = self.params.lod_generation_interval
if time_interval is None:
time_interval = self.params.lod_time_interval
if lod_range is None:
lod_range = self.params.lod_range
# iterate (potentially multiple) phylogenetic trees for the population
myleafs = [l for l in self.population.phylo_tree.leafs]
myleafs = sorted(myleafs, key=lambda x: int(x.val._unique_key), reverse=True)
sampled_leafs = myleafs[0:nr_lods]
#sampled_leafs = sorted(sampled_leafs, key=lambda x: str(x.val._unique_key), reverse=True)
for i,leaf in enumerate(sampled_leafs):
print 'constructing LOD nr ' + str(i+1)
if str(leaf.id) == '1': # This rare thing can happen when the population goes extinct almost immediately, but it can be insightful to make a LOD to see why it went extinct
iterlod = leaf.val.lods_down()
else:
iterlod = leaf.val.lods_up()
for lod in iterlod:
name = str(leaf.val._unique_key)
lod_save_dir = os.path.join(save_dir, name)
self.lods[leaf] = LOD(list(lod), name=name, save_dir = lod_save_dir,
stride=stride, time_interval=time_interval, lod_range=lod_range)
break # record 1 lod per leaf (in clonal pop, there is only 1 per leaf)
print 'done'
def _init_pop_hist_data_store(self):
'''Configure the DataStore for PopulationHistory data.'''
self.sim.data_store.init_phylo_hist_stores(phylo_hist=self)
[docs] def identify_lod_ancestor(self, ete_tree_struct, lod):
'''
Identify the individual in the population that is on the line of descent
(lod) under consideration.
The nodes in the ete tree corresponding to the *lod* will be annotated
with a tag.
Parameters
----------
ete_tree_struct : :class:`VirtualMicrobes.my_tools.utility.ETEtreeStruct`
container structure for phylogenetic tree representations
lod : :class:`LOD`
line of descent
Returns
-------
(:class:`VirtualMicrobes.virtual_cell.Cell.Cell`, :class:`ete3.TreeNode`)
(oldest ancestor cell, its tree node representation)
'''
last, last_ete = None, None
phylo2ete_dict = self.population.phylo_tree.ete_get_phylo2ete_dict(ete_tree_struct)
phylo_id2phylo = dict([ ( (str(phylo_unit.id),phylo_unit.time_birth), phylo_unit)
for phylo_unit in phylo2ete_dict ])
for anc in lod.lod: # going from oldest to youngest
anc_id = (str(anc.id), anc.time_birth)
if anc_id not in phylo_id2phylo: # reached an ancestor that lives later than the leafs in the tree
break
last = phylo_id2phylo[anc_id]
last_ete = phylo2ete_dict[last][0]
for ete_node in phylo2ete_dict[last]:
ete_node.add_feature('lod', True) # annotate the ete nodes as being on the lod
return last, last_ete
def _init_rand_gens(self, rand_seed=None):
if rand_seed is None:
test_rand_seed = self.params.test_rand_seed
self.test_rand = random.Random(int(test_rand_seed))
def _init_test_bed(self):
self.test_bed = Locality(self.params,
internal_molecules=self.environment.internal_molecules,
influx_reactions=self.environment.influx_reactions,
degradation_reactions=self.environment.degradation_reactions,
env_rand=self.test_rand )
def _init_integrator(self, diffusion_steps=None, between_diffusion_reports=None,
max_retries=3, retry_steps_factor=2.):
if diffusion_steps is None:
diffusion_steps = self.params.diffusion_steps
if between_diffusion_reports is None:
between_diffusion_reports = self.params.between_diffusion_reports
max_time_steps_store = max(int(diffusion_steps * between_diffusion_reports
* retry_steps_factor ** (max_retries)), 1)
integrator = integrate.Integrator(locality = self.test_bed, # @UndefinedVariable
nr_time_points=max_time_steps_store,
nr_neighbors=0,
num_threads=1,
step_function=self.params.step_function,
hstart=self.params.init_step_size,
epsabs=self.params.absolute_error,
epsrel=self.params.relative_error,
init_time=0.)
return integrator
[docs] def write_newick_trees(self):
'''
Write newick representation of phylogenetic trees to files.
'''
for ete_tree_struct, _lods in self.tree_lods:
name = ete_tree_struct.tree.name.split('_')[0]
filename = 'tree' + '_' + name
suffix = '.nw'
ete_tree_struct.tree.write(format=1,
outfile=os.path.join(self.save_dir,
filename + suffix))
[docs] def lod_stats(self, stride, time_interval, lod_range):
'''
Write time series for line of descent properties such as network
connectivity, protein expression etc.
Either use a stride or a time interval to sample individuals from the lod.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
'''
cumulative_features = self.sim.data_store.mut_stats_names + self.sim.data_store.fit_stats_names + ['iterage']
simple_features = self.sim.data_store.functional_stats_names
for key,lod in self.lods.items():
#print lod
print 'Saving lod statistics for lod ' + str (lod.name)
# DEPRECATED SINCE REMOVAL OF ETE3
#self.population.annotate_phylo_tree(ete_tree_struct=ete_tree_struct,
# features=cumulative_features)
#self.population.annotate_phylo_tree(ete_tree_struct=ete_tree_struct,
# features=simple_features, cummulative=False)
#for ref, l in lod.items():
self.sim.data_store.add_lod_data(lod,self.population,self.environment,
stride, time_interval, lod_range)
print 'done'
[docs] def lod_cells(self, stride, time_interval, lod_range, runtime):
'''
Write cell files for line of descent
The leaf of the tree is saved as CellLeaf<LOD_ID>, and all it's ancestors are saved as CellNode<BIRTHTIME>_<LOD_ID>.cell
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
'''
for key, lod in self.lods.items():
#for ref, lod in lods.items():
#for l in lod:
#print 'saving cells for ' , l._unique_key
self.sim.data_store.save_lod_cells(lod, stride, time_interval, lod_range,runtime)
print 'done'
[docs] def anc_cells(self, pop, time):
'''
Write cell files for all cells in the ancestry, which can be mapped on the newick tree
Parameters
----------
pop : current population that contains the current_ancestry list
time: run_time
'''
print 'saving all cells in the current ancestry'
self.sim.data_store.save_anc_cells(pop,time)
print 'done'
[docs] def lod_network_stats(self, stride, time_interval, lod_range):
'''
Write time series for line of descent properties such as network
connectivity, protein expression etc.
Either use a stride or a time interval to sample individuals from the lod.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
'''
for ete_tree_struct, lods in self.tree_lods:
print 'ete_tree', ete_tree_struct.tree.name
for ref, lod in lods.items():
print 'lod', ref.id
self.sim.data_store.add_lod_network_data(lod, stride,
time_interval,
lod_range)
print 'done'
[docs] def lod_binding_conservation(self, stride, time_interval, lod_range):
'''
Write time series for line of descent properties such as network
connectivity, protein expression etc.
Either use a stride or a time interval to sample individuals from the lod.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
'''
for ete_tree_struct, lods in self.tree_lods:
print 'ete_tree', ete_tree_struct.tree.name
for ref, lod in lods.items():
print 'lod', ref.id
self.sim.data_store.add_lod_binding_conservation(lod, stride,
time_interval,
lod_range)
print 'done'
[docs] def plot_lod_graphs(self, stride, time_interval, lod_range, formats):
'''
Output metabolic, GRN and genome graphs for the line of descent.
Either use a stride or a time interval to sample individuals from the lod.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
'''
metabolites = self.environment.mols_per_class_dict
conversions = self.environment.reactions_dict['conversion']
imports = self.environment.reactions_dict['import']
suffixes = map(lambda fmt: '.'+fmt, formats)
for ete_tree_struct, lods in self.tree_lods:
print 'ete_tree', ete_tree_struct.tree.name
for ref, lod in lods.items():
print 'adding network graphs for lod', str(ref.id)
attr_dict = self.sim.graphs.attribute_mapper
GRN_grapher = BindingNetwork(lod.save_dir, 'GRN', attribute_dict=attr_dict, show=False)
metabolome_grapher = MetabolicNetwork(lod.save_dir, 'Metabolome',
mol_class_dict=metabolites,
conversions=conversions,
imports=imports,
attribute_dict=attr_dict, show=False)
genome_grapher = Genome(lod.save_dir, 'Genome', attribute_dict=attr_dict, show=False) # Initialise grapher for genome structure (and make directory)
ancestors = lod.strided_lod(stride, time_interval, lod_range)
max_genome = max( [ anc.genome_size for anc in ancestors ] )
for anc in ancestors:
_plot_cell_graphs(anc, GRN_grapher, metabolome_grapher, genome_grapher, max_genome, suffixes)
[docs] def lods_time_course_plots(self, stride, time_interval, lod_range, formats):
'''
Output time course graphs for the line of descent.
Either use a stride or a time interval to sample individuals from the lod.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
'''
suffixes = map(lambda fmt: '.'+fmt, formats)
for ete_tree_struct, lods in self.tree_lods:
print 'ete_tree', ete_tree_struct.tree.name
for ref, lod in lods.items():
print 'lod', ref.id
save_dir = os.path.join(lod.save_dir, 'time_course_plots')
util.ensure_dir(save_dir)
ancestors = lod.strided_lod(stride, time_interval, lod_range)
for anc in ancestors:
_plot_cell_time_course(anc, self.sim.graphs,
save_dir = save_dir, suffixes=suffixes)
[docs] def lods_time_course_data(self, lod_range, chunk_size):
'''
Write time series data in the line of descent to files.
Concatenates time courses of individuals along a :class:`LOD`.
Concatenations are done in *chunks* of a chosen `chunk_size`. For each chunk
**.csv** files are stored in a directory named part*n*, where *n* is the
chunk number.
Parameters
----------
ancestors : list of :class:`VirtualMicrobes.virtual_cell.Cell.Cell`\s
base_save_dir : str
viewer_path : str
path to utility files for html data viewer
chunk_size : int
length of chunks of concatenated data
'''
for ete_tree_struct, lods in self.tree_lods:
print 'ete_tree', ete_tree_struct.tree.name
for ref, lod in lods.items():
print 'lod', ref.id
save_dir = os.path.join(lod.save_dir, 'time_courses')
util.ensure_dir(save_dir)
ancestors = lod.strided_lod(stride=None, time_interval=None, lod_range=lod_range)
_lod_time_course_data(ancestors, base_save_dir = save_dir,
viewer_path=os.path.join(self.sim.utility_path, 'time_course_viewer'),
chunk_size=chunk_size)
[docs] def draw_ref_trees(self, rescale=False):
'''
Output reference trees for phylogenetic trees with lods labeled.
Uses phylogenetic tree drawing methods to annotate the leaf nodes of
lods. Reference trees give a visual overview of the position of the lods
that are analysed in the tree.
'''
for ete_tree_struct, lods in self.tree_lods:
save_loc = os.path.join(self.save_dir,
ete_tree_struct.tree.name.split('_')[0],
'ancestry_plots')
util.ensure_dir(os.path.join(save_loc+'/reftree_cells'))
cells_in_tree = self.population.phylo_tree.ete_nodes_to_phylo_units(ete_tree_struct)
for cell in cells_in_tree:
for parent in cell.parents:
write_obj.write_cell(cell, os.path.join(save_loc, 'reftree_cells/Node_' + str(parent.id)+ '_' + str(cell.time_birth) + '.cell'), alive=True)
for leaf, lod in lods.items():
self.population.phylo_tree.annotate_phylo_units_feature(ete_tree_struct,
lod.lod, 'in_lod')
self.population.phylo_tree.annotate_leafs(ete_tree_struct, leaf)
self.population.phylo_tree.ete_prune_internal(ete_tree_struct)
func_features={'metabolic_type':self.population.metabolic_type_color}
self.population.annotate_phylo_tree(ete_tree_struct,
func_features=func_features,
cummulative=False,
prune_internal=True,
to_rate=False
)
attr_dict = self.sim.graphs.attribute_mapper
rescale_factor = None
if rescale:
rescale_factor = 500.0 / self.population.phylo_tree.ete_calc_lca_depth(ete_tree_struct.tree)
print 'rescale factor is ', rescale_factor
phylo_grapher = PhyloTreeGraph(save_loc, name='Phylotree', attribute_dict=attr_dict, show=False)
phylo_grapher.update(ete_tree_struct.tree)
phylo_grapher.save_fig(feature='metabolic_with_lod', name='lodstree',
rescale=rescale_factor, dpi=10, suffix=".svg")
print 'Plotted reference tree'
[docs] def dump_pop_cells(self,time,prunegens):
'''
Output current population cells as cellfiles
'''
print 'Now saving all cellfiles of loaded population'
print '(stepping back ' + str(prunegens) + ' generations)'
directory = os.path.join(self.save_dir, 'pop_cells/', str(time))
print directory
util.ensure_dir(directory, remove_globs=['*.cell', '*.json', '*.mp4'])
stored_cells = []
for i,cell in enumerate(self.sim.system.population.cells):
storecell = cell
for x in range(0,prunegens):
storecell = storecell.parents.__iter__().next()
if(storecell._unique_key not in stored_cells):
write_obj.write_cell(storecell, os.path.join(directory+'/Cellfile_inpop_' + str(storecell._unique_key).zfill(16) +'.cell'))
self.sim.data_store.add_cell_tc(storecell,directory,
self.sim.graphs.attribute_mapper, time, cell._unique_key)
stored_cells.append(storecell._unique_key)
print 'Saved all cellfiles'
[docs] def dump_anc_cells(self,time):
'''
Dump all ancestors (perfect fossil record) to files, and also save the newick tree.
Should be all in there?
'''
directory = os.path.join(self.save_dir, 'anc_cells/')
util.ensure_dir(directory)
f = open(directory+str(time)+".nw", "w")
for tree in self.population.phylo_tree.nh_formats():
f.write(tree)
f.write('\n')
print 'Now saving all cellfiles of ancestry (fossil record)'
directory = os.path.join(self.save_dir, 'anc_cells/', str(time))
util.ensure_dir(directory)
print directory
for cell in self.population.current_ancestors:
write_obj.write_cell(cell, os.path.join(directory+'/Cellfile_ancestor_' + str(cell._unique_key).zfill(16) +'.cell'), alive=True)
self.sim.data_store.add_cell_tc(cell,directory,
self.sim.graphs.attribute_mapper, time, cell._unique_key)
print 'Saved all cellfiles for ancestry (fossil record)'
[docs] def dump_lod_cells(self,time):
'''
Dump all cells used in LOD analysis to files (i.o.w. a single lineages / subset of anc_cells)
'''
print 'Now saving all cellfiles of line of descent (LOD)'
directory = os.path.join(self.save_dir, 'lod_cells/', str(time))
util.ensure_dir(directory)
print directory
for key, lod in self.lods.items():
directory = os.path.join(self.save_dir, 'lod_cells/', str(time), str(lod.name))
util.ensure_dir(directory)
for cell in lod:
write_obj.write_cell(cell, os.path.join(directory+'/Cellfile_lod_' + str(cell._unique_key).zfill(16) +'.cell'), alive=True)
self.sim.data_store.add_cell_tc(cell,directory,
self.sim.graphs.attribute_mapper, time, cell._unique_key)
print 'Saved all cellfiles for LOD'
# def compare_to_pop(self, compare_save, prune_depth=None, leafs_sample_size=None):
# REMOVED BY BREM 02-2019 AS FUNCTIONS IS OUTDATED, UNTESTED AND NO LONGER USED
def __str__(self):
return '\n'.join([ str(lod) for tree_lods in self.tree_lods
for lod in tree_lods[1] ])
[docs]class LOD(object):
'''
classdocs
'''
def __init__(self, lod, name, stride, time_interval, lod_range, save_dir=None):
'''
Store the line of descent to analyse.
'''
self.lod = lod
self.name = name
self.stride = stride
self.time_interval = time_interval
self.lod_range = lod_range
self.save_dir = save_dir
if self.save_dir is not None:
util.ensure_dir(self.save_dir)
[docs] def standardized_production(self, test_params):
for c in self.lod:
self.test_bed.clear_locality()
self.test_bed.add_cell(c)
integrator = self._init_integrator()
self.run_system(integrator)
[docs] def strided_lod(self, stride, time_interval, lod_range):
'''
Sample individuals within a range of the LOD at regular intervals.
Either use a stride or a time interval to sample individuals from the
lod. If a time interval is provided, ancestors are sampled that have a
time of birth that is approximately separated by `time_interval` in the
evolutionary simulation.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
Returns
-------
list of ancestor :class:`VirtualMicrobes.virtual_cell.Cell.Cell` s
'''
if lod_range is None:
lod_range = (0.,1.)
if stride is not None and time_interval is not None:
raise Exception('defining both stride and time_interval is not allowed')
lod_all = list(self)
if stride is not None:
ancestors = lod_all[::stride]
elif time_interval is not None:
ancestors = list(self.t_interval_iter(time_interval))
else:
ancestors = lod_all
if ancestors[-1] != lod_all[-1]:
ancestors.append(lod_all[-1])
from_root, from_leaf = int(lod_range[0]*len(ancestors)), int(lod_range[1]*len(ancestors))
return ancestors[from_root:from_leaf]
[docs] def t_interval_iter(self, time_interval):
'''
Iterate ancestors that are approximately 'time_interval' timesteps apart
in their time of birth.
'''
mod_time = time_interval
for anc in self:
prev_mod_time = mod_time
mod_time = anc.time_birth % time_interval
if mod_time > prev_mod_time:
continue
yield anc
def __iter__(self):
return iter(self.lod)