Source code for VirtualMicrobes.virtual_cell.Genome

import collections
from copy import copy, deepcopy
import itertools
from orderedset import OrderedSet

from VirtualMicrobes.my_tools.utility import OrderedDefaultdict
import matplotlib as mpl


[docs]class Genome(object): """ """ class_version = '1.0' def __init__(self, chromosomes, min_bind_score): self.version = self.__class__.class_version self.init_chromosomes(chromosomes) self.init_regulatory_network(min_bind_score) self.genes_removed = set() self.chromosomes_removed = set() @property def size(self): return self.__len__() @property def tfs(self): return ( g for g in self if g['type'] == 'tf' ) @property def enzymes(self): return ( g for g in self if g['type'] == 'enz' ) @property def pumps(self): return ( g for g in self if g['type'] == 'pump' ) @property def eff_pumps(self): return [ p for p in self.pumps if p['exporting'] ] @property def inf_pumps(self): return [ p for p in self.pumps if not p['exporting'] ] @property def copy_number_dist(self): return collections.Counter( self.copy_numbers.values() ) @property def copy_numbers(self): return collections.Counter( [ ge['type']+str(ge.id.major_id) for ge in self ]) @property def copy_numbers_tfs(self): return collections.Counter( [ str(ge.id.major_id)+str(ge.ligand_class) for ge in self.tfs ]) @property def copy_numbers_enzymes(self): return collections.Counter( [ str(ge.id.major_id) for ge in self.enzymes ]) @property def copy_numbers_inf_pumps(self): return collections.Counter( [ str(ge.id.major_id) for ge in self.inf_pumps ]) @property def copy_numbers_eff_pumps(self): return collections.Counter( [ str(ge.id.major_id) for ge in self.eff_pumps ]) @property def operators(self): return OrderedSet(g.operator for g in self) @property def binding_sequences(self): return OrderedSet(g.binding_sequence for g in self.tfs)
[docs] def init_chromosomes(self, chromosomes): ''' Initialize chromosomes. Add preinitialized chromosomes to the genome. Parameters ---------- chromosomes : iterable of :class:`VirtualMicrobes.virtual_cell.Chromosome.Chromosome` chromosomes to add ''' self.chromosomes = [] for chrom in chromosomes: self.add_chromosome(chrom)
[docs] def add_chromosome(self,chrom, verbose=False): ''' Add a chromosome to the list of chromosomes. ''' if verbose: print "adding chromosome", chrom self.chromosomes.append(chrom)
[docs] def del_chromosome(self,chrom, remove_genes=True, verbose=False): ''' Delete a chromosome. Remove a chromosome from the list of chromosomes. If `remove_genes` is True the genome will be further updated to reflect deletion of genes. E.g. the sequence bindings should be updated when genes are removed from the genome. It may be useful to defer updating if it is already known that the genes will be readded immediately. This may be the case when a chromosome is split (fission) or fused and no genes will be actually lost from the genome. Parameters ---------- chrom : :class:`VirtualMicrobes.virtual_cell.Chromosome.Chromosome` chromosome to be removed remove_genes : bool if True update the genome verbose : bool be verbose ''' if verbose: print "deleting chromosome", chrom self.chromosomes.remove(chrom) if remove_genes: self.chromosomes_removed.add(chrom) self.update_genome_removed_genes(chrom.positions)
[docs] def bs_to_tfs_dict(self): ''' Create mapping from binding sequences to tfs. For each binding sequence in the genome map to the set of tfs that contain this binding sequence Returns ------- mapping from :class:`VirtualMicrobes.virtual_cell.Sequence.BindingSequence` to set of :class:`VirtualMicrobes.virtual_cell.Gene.TranscriptionFactor ''' d = collections.OrderedDict() for bs in self.binding_sequences: d[bs] = OrderedSet(tf for tf in self.tfs if tf.binding_sequence == bs) return d
[docs] def op_to_tfs_scores_dict(self): ''' Create mapping from operators to the tfs that bind them, with their scores. For each operator in the genome map the set of tfs, together with their binding scores. Returns ------- mapping from :class:`VirtualMicrobes.virtual_cell.Sequence.Operator` to set of :class:`VirtualMicrobes.virtual_cell.Gene.TranscriptionFactor, binding-score (float) tuples. ''' op_to_tfs_scores = OrderedDefaultdict(list) bs_to_tf_dict = self.bs_to_tfs_dict() for op in self.operators: for bs, score in op.binding_sequences.items(): for tf in bs_to_tf_dict[bs]: op_to_tfs_scores[op].append((tf,score)) return op_to_tfs_scores
[docs] def binding_tfs_scores(self, op): ''' Return tfs that bind this operator and their scores. Parameters ---------- op : :class:`VirtualMicrobes.virtual_cell.Sequence.Operator` operator sequence Returns ------- list of :class:`VirtualMicrobes.virtual_cell.Gene.TranscriptionFactor`, float tuples ''' tfs_scores = list() bs_to_tf_dict = self.bs_to_tfs_dict() for bs, score in op.binding_sequences.items(): for tf in bs_to_tf_dict[bs]: tfs_scores.append((tf,score)) return tfs_scores
[docs] def update_regulatory_network(self, min_bind_score): ''' Update the binding state of the regulatory network. Iterate over all Sequences in the genome and if their check_binding flag is set, match the sequence against all potential binders in the genome. Parameters ---------- min_bind_score : float minimum binding score for sequence matching ''' for op in self.operators: if op.check_binding: op.update_binding_sequences(self.binding_sequences, min_bind_score) for bs in op.binding_sequences: assert op in bs.bound_operators for bs in self.binding_sequences: if bs.check_binding: bs.match_operators(self.operators, min_bind_score) for op in bs.bound_operators: assert bs in op.binding_sequences
[docs] def reset_regulatory_network(self, min_bind_score): ''' Reset the binding state of the regulatory network. Iterate over all Sequences in the genome and clear all bindings. Then re-initialize the regulatory network. Parameters ---------- min_bind_score : float minimum binding score for sequence matching ''' for bs in self.binding_sequences: bs.clear_bound_operators() for op in self.operators: op.clear_binding_sequences() self.init_regulatory_network(min_bind_score)
[docs] def init_regulatory_network(self, min_bind_score): ''' Initialize the binding state of the regulatory network. Iterate over all :class:`VirtualMicrobes.virtual_cell.Sequence.Operator`s in the genome and match them against all :class:`VirtualMicrobes.virtual_cell.Sequence.BindingSequence`s. Parameters ---------- min_bind_score : float minimum binding score for sequence matching ''' for op in self.operators: op.update_binding_sequences(self.binding_sequences, min_bind_score) for bs in self.binding_sequences: bs.check_binding = False #because all TFs have been updated simultaneously
[docs] def tf_connections_dict(self): """ A dictionry of TFs to sets of downstream bound genes. """ d = collections.defaultdict(set) # NOTE: unordered ok, use for output only op_to_tfs_scores_dict = self.op_to_tfs_scores_dict() for g in self: for tf,_score in op_to_tfs_scores_dict[g.operator]: d[tf].add(g) return d
def _inform_lost_bs(self, binding_sequence): '''When a binding sequence is lost (due to deletion) it informs operators it was bound to. A single binding sequence that was removed may or not be present in another gene (copy) in the genome. Only if it was the last of its type, should the operators that it was binding to be informed about its removal. These operators remove the binding_sequence from their internal bound binding_sequence dictionaries. Parameters ---------- binding_sequence : :class:`VirtualMicrobes.virtual_cell.Sequence.BindingSequence` binding_sequence to check See Also -------- func:`_inform_lost_operator` ''' if binding_sequence not in self.binding_sequences: binding_sequence.inform_operators() def _inform_lost_operator(self, operator): ''' If an operator is lost from the genome inform its binding sequences. Tells :class:`VirtualMicrobes.virtual_cell.Sequence.BindingSequence`s of the `operator` to remove `operator`. Parameters ---------- operator : :class:`VirtualMicrobes.virtual_cell.Sequence.Operator` operator to check See Also -------- func:`_inform_lost_bs` ''' if operator not in self.operators: operator.inform_bss()
[docs] def update_genome_removed_gene(self, gene): ''' Remove a gene from the genome if no more copies exist in the genome. Updates the genome Parameters ---------- gene : :class:`VirtualMicrobes.virtual_cell.GenomicElement.GenomicElement` gene to be removed ''' if gene['type'] == 'tf': self._inform_lost_bs(gene.binding_sequence) if gene not in set(self): self.genes_removed.add(gene) self._inform_lost_operator(gene.operator)
[docs] def update_genome_removed_genes(self,genes): ''' Update the genome to reflect gene deletions. After the deletion of (part of) a chromosome, the genome has to be updated to reflect the change. Because exact copies of deleted genes may still be present in another part of the genome a check has to be performed before definitive removal. Parameters ---------- genes : iterable of :class:`VirtualMicrobes.virtual_cell.GenomicElement.GenomicElement` genes that were targeted by a deletion operation. ''' for g in set(genes): # NOTE: unordered ok self.update_genome_removed_gene(g)
[docs] def die(self, time): ''' Record death of phylogenetic units in the genome. Typically called from the cell when it dies. All phylogenetic units in the genome are instructed to record their death. When phylogenetic units are no longer alive, they may be pruned from their respective phylogenetic trees if there are no more living descendants of the phylogenetic unit. Parameters ---------- time : float simulation time ''' for g in self: g.die(time) for chrom in self.chromosomes: chrom.die(time) for g in self.genes_removed: g.die(time) for c in self.chromosomes_removed: c.die(time)
#clear the sequences bindings to decrease references to (dead) sequences #======================================================================= # for op in self.operators: # op.clear_binding_sequences() # for bs in self.binding_sequences: # bs.clear_bound_operators() #=======================================================================
[docs] def prune_genomic_ancestries(self): ''' Prune the phylogenetic trees of phylogenetic units in the genome. Returns ------- tuple (set of :class:`VirtualMicrobes.virtual_cell.Chromosome.Chromosome` , set of :class:`VirtualMicrobes.virtual_cell.GenomicElement.GenomicElement`) ''' pruned_genes = self._prune_gene_ancestries() pruned_chromosomes = self._prune_chromosome_ancestries() return pruned_chromosomes, pruned_genes
def _prune_gene_ancestries(self): pruned_genes = set() for g in set(self) | self.genes_removed: pruned_genes.update(g.prune_dead_branch()) return pruned_genes def _prune_chromosome_ancestries(self): pruned_chromosomes = set() for chrom in set(self.chromosomes) | self.chromosomes_removed : pruned_chromosomes.update(chrom.prune_dead_branch()) return pruned_chromosomes def _reproduction_copy_operators(self, operators, time): ''' Make copy of operators present in the genome during reproduction. Copies each operator and stores the originals and copies in a map. This map is used as a reference when genes are copied and the new gene copy's reference to its operator is updated to the newly created operator copy. Parameters ---------- operators : iterable of :class:`VirtualMicrobes.virtual_cell.Sequence.Operators` parent operators time : float simulation time point Returns ------- mapping of parent to child operators ''' self.orig_copy_operator_map = dict() # NOTE: unordered ok for op in operators: copy = op._reproduction_copy(time) self.orig_copy_operator_map[op] = copy return self.orig_copy_operator_map def _reproduction_copy_binding_sequences(self, binding_sequences, time): ''' Make copy of binding sequences present in the genome during reproduction. Copies each operator and stores the originals and copies in a map. This map is used as a reference when genes are copied and the new gene copy's reference to its binding sequence is updated to the newly created operator copy. Parameters ---------- binding_sequences : iterable of :class:`VirtualMicrobes.virtual_cell.Sequence.BindingSequence` parent binding sequences time : float simulation time point Returns ------- mapping of parent to child binding sequences ''' self.orig_copy_binding_sequence_map = dict() # NOTE: unordered ok for bs in binding_sequences: copy = bs._reproduction_copy(time) self.orig_copy_binding_sequence_map[bs] = copy return self.orig_copy_binding_sequence_map def _update_bind_mapping(self): ''' Update bindings from parent to child sequences. ''' for bs in self.binding_sequences: bs._update_bound_operators(self.orig_copy_operator_map) for op in self.operators: op._update_binding_sequences(self.orig_copy_binding_sequence_map) def _reproduction_copy_genes(self, parent_genome, time): ''' Make copy of the parent_genome. Copies each gene and stores the originals and copies in a map. This map is used as a reference when chromosomes are copied and the new chromosome copy's references to genes in the parent_genome is updated to the newly created gene copies. Parameters ---------- parent_genome : :class:`Genome` genome of parent time : float simulation time point Returns ------- mapping from parent genes to gene copies belonging to this offspring ''' self.orig_copy_genes_map = dict() # NOTE: unordered ok for g in parent_genome: copy = g._reproduction_copy(time) copy.operator = self.orig_copy_operator_map[g.operator] if copy['type'] == 'tf': copy.binding_sequence = self.orig_copy_binding_sequence_map[g.binding_sequence] self.orig_copy_genes_map[g] = copy return self.orig_copy_genes_map def _reproduction_copy_chromosomes(self,chromosomes, time): for chrom in chromosomes: self.chromosomes.append(chrom._reproduction_copy(self.orig_copy_genes_map, time)) def _reproduction_copy(self, time): ''' ''' cls = self.__class__ result = cls.__new__(cls) for k, v in self.__dict__.items(): if k in [ "enzymes", "pumps", "tfs", "binding_sequences", "operators", 'orig_copy_genes_map', 'orig_copy_operator_map', 'orig_copy_binding_sequence_map'] : pass elif k in ["genes_per_type", "chromosomes", 'genes_removed', 'chromosomes_removed', 'removed_bss', 'removed_operators']: atr_cls = v.__class__ setattr(result, k, atr_cls.__new__(atr_cls)) elif k in ['version']: setattr(result, k , v) else: print "deepcopying", k , "of genome" setattr(result, k, deepcopy(v)) result._reproduction_copy_operators(self.operators, time) result._reproduction_copy_binding_sequences(self.binding_sequences, time) result._reproduction_copy_genes(self, time) result._reproduction_copy_chromosomes(self.chromosomes, time) result._update_bind_mapping() return result
[docs] def toJSON(self, *args, **kwargs): children = [] for i,c in enumerate(self.chromosomes): children.append(c.toJSON(i, *args, **kwargs)) d = {'name': 'genome', 'description':'genome', 'colour': mpl.colors.rgb2hex(mpl.colors.colorConverter.to_rgb('grey')), 'children': children} return d
def __str__(self): return '\n'.join([ str(chrom) for chrom in self.chromosomes]) def __len__(self): return sum(map(len, self.chromosomes)) def __iter__(self): """ Iterate over all positions in all chromosomes. Yields ------ :class:`VirtualMicrobes.virtual_cell.GenomicElement.GenomicElement` Genomic elements in the order of the chromosomes in the genome and positions in chromosomes. """ return itertools.chain(*self.chromosomes)
[docs] def update(self, state): version = float(state.get('version', '0.0')) if version < 1.0: for attr in ['genes_per_type', 'operators', 'binding_sequences', 'removed_bss', 'removed_operators']: del state[attr]
[docs] def upgrade(self): version = float(self.version) self.version = self.class_version print 'upgrade class', self.__class__.__name__, print 'from version', version, 'to version', self.version if version < 1.0: self.genes_removed = set() self.chromosomes_removed = set()
def __setstate__(self, state): self.update(state) self.__dict__ = state if not hasattr(self, 'version'): self.version = '0.0' if self.version != self.class_version: self.upgrade()