Merge branch 'rasmachine_dev'

sorgerlab · Mar 9, 2017 · 63bb438 · 63bb438
2 parents d843c47 + ed8c697
commit 63bb438
Show file tree

Hide file tree

Showing 9 changed files with 76 additions and 22 deletions.
diff --git a/indra/assemblers/pysb_assembler.py b/indra/assemblers/pysb_assembler.py
@@ -87,7 +87,7 @@ def get_create_base_agent(self, agent):
 
         # Handle location condition
         if agent.location is not None:
-            base_agent.create_site('loc', [agent.location])
+            base_agent.create_site('loc', [_n(agent.location)])
 
         # Handle activity
         if agent.activity is not None:
@@ -355,7 +355,7 @@ def get_agent_rule_str(agent):
             else:
                 rule_str_list.append('n' + _n(b.agent.name))
     if agent.location is not None:
-        rule_str_list.append(agent.location.replace(' ', '_'))
+        rule_str_list.append(_n(agent.location))
     rule_str = '_'.join(rule_str_list)
     return rule_str
 
@@ -564,7 +564,7 @@ def get_site_pattern(agent):
 
     # Handle location
     if agent.location is not None:
-        pattern['loc'] = agent.location
+        pattern['loc'] = _n(agent.location)
 
     # Handle activity
     if agent.activity is not None:
@@ -2061,7 +2061,7 @@ def translocation_assemble_default(stmt, model, agent_set):
     if stmt.from_location is None or stmt.to_location is None:
         return
     param_name = 'kf_%s_%s_%s' % (_n(stmt.agent.name).lower(),
-                                  stmt.from_location, stmt.to_location)
+                                  _n(stmt.from_location), _n(stmt.to_location))
     kf_trans = get_create_parameter(model, param_name, 1.0, unique=True)
     monomer = model.monomers[_n(stmt.agent.name)]
     rule_agent_str = get_agent_rule_str(stmt.agent)

diff --git a/indra/biopax/processor.py b/indra/biopax/processor.py
@@ -278,7 +278,16 @@ def get_activity_modification(self, force_contains=None):
                                           evidence=ev)
                         self.statements.append(decode_obj(stmt,
                                                           encoding='utf-8'))
-    def get_regulate_amounts(self):
+    def get_regulate_amounts(self, force_contains=None):
+        """Extract INDRA RegulateAmount statements from the model.
+
+        Parameters
+        ----------
+        force_contains : Optional[list[str]]
+            A list of gene names for filtering. Only Statements in which the
+            gene names in the force_contains list appear will be extracted.
+            Default: None
+        """
         pb = _bpp('PatternBox')
 
         p = pb.controlsExpressionWithTemplateReac()
@@ -349,6 +358,11 @@ def get_regulate_amounts(self):
                   for cit in citations]
             for subj, obj in itertools.product(_listify(controller),
                                                _listify(controlled)):
+                if force_contains is not None:
+                    if subj and subj.name not in force_contains:
+                        continue
+                    if obj and obj.name not in force_contains:
+                        continue
                 subj_act = ActivityCondition('transcription', True)
                 subj.activity = subj_act
                 if control_type == 'ACTIVATION':
@@ -921,6 +935,9 @@ def _get_entref(bpe):
         'mearg': ('methylation', 'R'),
         'methylated L-arginine': ('methylation', 'R'),
         'methylated arginine': ('methylation', 'R'),
+        'melys' : ('methylation', 'K'),
+        'methylated lysine' : ('methylation', 'K'),
+        'methylated L-lysine' : ('methylation', 'K'),
         'ubiquitination': ('ubiquitination', None),
         'ubiquitinylated lysine': ('ubiquitination', 'K'),
         'ubiquitination signature tetrapeptidyl lysine': ('ubiquitination', 'K'),

diff --git a/indra/preassembler/sitemapper.py b/indra/preassembler/sitemapper.py
@@ -140,7 +140,7 @@ def map_sites(self, stmts, save_fname=None):
                                   if isinstance(stmt, Modification)
                                   else stmt_copy.enz)
                 # Check the modification on the appropriate agent
-                old_mod_list = [ModCondition(None, stmt.residue,
+                old_mod_list = [ModCondition('modification', stmt.residue,
                                              stmt.position)]
                 # Figure out if this site is invalid
                 stmt_invalid_sites = \

diff --git a/indra/reach/processor.py b/indra/reach/processor.py
@@ -363,11 +363,13 @@ def _get_agent_from_entity(self, entity_id):
                 be_id = bioentities_map.get(('PF', xr['id']))
                 if be_id:
                     db_refs['BE'] = be_id
+                    agent_name = be_id
                 db_refs['PF'] = xr['id']
             elif ns == 'interpro':
                 be_id = bioentities_map.get(('IP', xr['id']))
                 if be_id:
                     db_refs['BE'] = be_id
+                    agent_name = be_id
                 db_refs['PF'] = xr['id']
             elif ns == 'chebi':
                 db_refs['CHEBI'] = xr['id']
@@ -384,6 +386,7 @@ def _get_agent_from_entity(self, entity_id):
                     db_refs['HMDB'] = xr['id']
             elif ns == 'be':
                 db_refs['BE'] = xr['id']
+                agent_name = db_refs['BE']
             # These name spaces are ignored
             elif ns in ['uaz']:
                 pass
@@ -618,6 +621,8 @@ def _parse_site_text(s):
     'defarnesylation': ('farnesylation', False),
     'ribosylation': ('ribosylation', True),
     'deribosylation': ('ribosylation', False),
+    'methylation': ('methylation', True),
+    'demethylation': ('methylation', False),
     'unknown': ('modification', True),
 }
 

diff --git a/indra/statements.py b/indra/statements.py
@@ -1318,11 +1318,11 @@ class Demyristoylation(RemoveModification):
     """Demyristoylation modification."""
     pass
 
-class Methylation(Modification):
+class Methylation(AddModification):
     """Methylation modification."""
     pass
 
-class Demethylation(Modification):
+class Demethylation(RemoveModification):
     """Demethylation modification."""
     pass
 

diff --git a/indra/tools/gene_network.py b/indra/tools/gene_network.py
@@ -156,7 +156,9 @@ def get_biopax_stmts(self, filter=False, query='pathsbetween'):
         bp.get_acetylation()
         bp.get_palmitoylation()
         bp.get_glycosylation()
+        bp.get_ubiquitination()
         bp.get_activity_modification()
+        bp.get_regulate_amounts()
         # Save statements to pickle file if we're caching
         if self.basename is not None:
             with open(biopax_stmt_path, 'wb') as f:

diff --git a/indra/trips/processor.py b/indra/trips/processor.py
@@ -876,6 +876,7 @@ def _get_agent_by_id(self, entity_id, event_id):
             # Determine the agent name
             hgnc_id = db_refs.get('HGNC')
             up_id = db_refs.get('UP')
+            be_id = db_refs.get('BE')
             agent_name = None
             # HGNC name takes precedence
             if hgnc_id:
@@ -887,6 +888,10 @@ def _get_agent_by_id(self, entity_id, event_id):
                 gene_name = up_client.get_gene_name(up_id)
                 if gene_name:
                     agent_name = gene_name
+            # If it is mapped to Bioentities then we standardize its name
+            # to the Bioentities entry name
+            elif be_id:
+                agent_name = be_id
             # Otherwise, take the name of the term as agent name
             else:
                 name = term.find("name")

diff --git a/models/rasmachine/rasmachine.py b/models/rasmachine/rasmachine.py
@@ -241,6 +241,36 @@ def _extend_dict(d1, d2):
         d1[k] = v
     return d1
 
+def filter_db_highbelief(stmts_in, db_names, belief_cutoff):
+    logger.info('Filtering %d statements to above %f belief' %
+                (len(stmts_in), belief_cutoff))
+    # The first round of filtering is in the top-level list
+    stmts_out = []
+    # Now we eliminate supports/supported-by
+    for stmt in stmts_in:
+        sources = set([ev.source_api for ev in stmt.evidence])
+        if stmt.belief >= belief_cutoff or \
+            sources.intersection(db_names):
+            stmts_out.append(stmt)
+        else:
+            continue
+        supp_by = []
+        supp = []
+        for st in stmt.supports:
+            sources = set([ev.source_api for ev in st.evidence])
+            if st.belief >= belief_cutoff or \
+                sources.intersection(db_names):
+                supp.append(st)
+        for st in stmt.supported_by:
+            sources = set([ev.source_api for ev in st.evidence])
+            if st.belief >= belief_cutoff or \
+                sources.intersection(db_names):
+                supp_by.append(st)
+        stmt.supports = supp
+        stmt.supported_by = supp_by
+    logger.info('%d statements after filter...' % len(stmts_out))
+    return stmts_out
+
 if __name__ == '__main__':
     logger.info('-------------------------')
     logger.info(time.strftime('%c'))
@@ -386,12 +416,9 @@ def _extend_dict(d1, d2):
     # Original statistics
     stats['orig_stmts'] = len(model.get_statements())
     stats['orig_assembled'] = len(model.assembled_stmts)
-    db_stmts = ac.filter_evidence_source(model.assembled_stmts,
-                                         ['biopax', 'bel'], policy='one')
-    no_db_stmts = ac.filter_evidence_source(model.assembled_stmts,
-                                            ['biopax', 'bel'], policy='none')
-    no_db_stmts = ac.filter_belief(no_db_stmts, belief_threshold)
-    orig_stmts = db_stmts + no_db_stmts
+    orig_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'],
+                                      belief_threshold)
+    orig_stmts = ac.filter_top_level(orig_stmts)
     stats['orig_final'] = len(orig_stmts)
     logger.info('%d final statements' % len(orig_stmts))
 
@@ -407,12 +434,9 @@ def _extend_dict(d1, d2):
     # New statistics
     stats['new_stmts'] = len(model.get_statements())
     stats['new_assembled'] = len(model.assembled_stmts)
-    db_stmts = ac.filter_evidence_source(model.assembled_stmts,
-                                         ['biopax', 'bel'], policy='one')
-    no_db_stmts = ac.filter_evidence_source(model.assembled_stmts,
-                                            ['biopax', 'bel'], policy='none')
-    no_db_stmts = ac.filter_belief(no_db_stmts, belief_threshold)
-    new_stmts = db_stmts + no_db_stmts
+    new_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'],
+                                     belief_threshold)
+    new_stmts = ac.filter_top_level(new_stmts)
     stats['new_final'] = len(new_stmts)
     logger.info('%d final statements' % len(new_stmts))
 

diff --git a/setup.py b/setup.py
@@ -6,8 +6,9 @@
 def main():
     # Only install functools32 if we're in Python 2 (it's not available
     # for Python 3)
-    install_list = ['pysb>=1.2.1', 'objectpath', 'rdflib', 'requests>=2.11',
-                    'lxml', 'ipython', 'future', 'networkx', 'pandas']
+    install_list = ['pysb>=1.2.1', 'objectpath', 'rdflib==4.2.1',
+                    'requests>=2.11', 'lxml', 'ipython', 'future',
+                    'networkx', 'pandas']
     if sys.version_info[0] == 2:
         install_list.append('functools32')