OpenSourceBrain · stellaprins · Nov 4, 2024 · Nov 4, 2024 · Nov 4, 2024 · Nov 4, 2024
diff --git a/BioModels/BIOMD0000000001/BIOMD0000000001_url.omex b/BioModels/BIOMD0000000001/BIOMD0000000001_url.omex
diff --git a/BioModels/BIOMD0000000001/BIOMD0000000001_url.sedml b/BioModels/BIOMD0000000001/BIOMD0000000001_url.sedml
diff --git a/BioModels/BIOMD0000000001/BIOMD0000000001_url.xml b/BioModels/BIOMD0000000001/BIOMD0000000001_url.xml
diff --git a/BioModels/BIOMD0000000001/tests/d1_plots_local/amici_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000001/tests/d1_plots_local/amici_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000001/tests/d1_plots_local/copasi_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000001/tests/d1_plots_local/copasi_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000001/tests/d1_plots_local/pysces_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000001/tests/d1_plots_local/pysces_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000001/tests/d1_plots_local/tellurium_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000001/tests/d1_plots_local/tellurium_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000001/tests/d1_plots_local/vcell_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000001/tests/d1_plots_local/vcell_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000001/tests/d1_plots_remote/amici_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000001/tests/d1_plots_remote/amici_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000001/tests/d1_plots_remote/copasi_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000001/tests/d1_plots_remote/copasi_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000001/tests/d1_plots_remote/masspy_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000001/tests/d1_plots_remote/masspy_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000001/tests/d1_plots_remote/pysces_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000001/tests/d1_plots_remote/pysces_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000001/tests/d1_plots_remote/tellurium_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000001/tests/d1_plots_remote/tellurium_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000001/tests/results_compatibility_biosimulators.md b/BioModels/BIOMD0000000001/tests/results_compatibility_biosimulators.md
diff --git a/BioModels/BIOMD0000000001/tests/results_local.json b/BioModels/BIOMD0000000001/tests/results_local.json
diff --git a/BioModels/BIOMD0000000001/tests/results_remote.json b/BioModels/BIOMD0000000001/tests/results_remote.json
diff --git a/BioModels/BIOMD0000000138/BIOMD0000000138_url.sedml b/BioModels/BIOMD0000000138/BIOMD0000000138_url.sedml
diff --git a/BioModels/BIOMD0000000138/BIOMD0000000138_url.xml b/BioModels/BIOMD0000000138/BIOMD0000000138_url.xml
diff --git a/BioModels/BIOMD0000000138/tests/d1_plots_local/amici_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000138/tests/d1_plots_local/amici_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000138/tests/d1_plots_local/copasi_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000138/tests/d1_plots_local/copasi_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000138/tests/d1_plots_local/pysces_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000138/tests/d1_plots_local/pysces_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000138/tests/d1_plots_local/tellurium_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000138/tests/d1_plots_local/tellurium_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000138/tests/d1_plots_local/vcell_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000138/tests/d1_plots_local/vcell_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000138/tests/d1_plots_remote/amici_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000138/tests/d1_plots_remote/amici_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000138/tests/d1_plots_remote/copasi_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000138/tests/d1_plots_remote/copasi_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000138/tests/d1_plots_remote/masspy_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000138/tests/d1_plots_remote/masspy_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000138/tests/d1_plots_remote/pysces_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000138/tests/d1_plots_remote/pysces_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000138/tests/d1_plots_remote/tellurium_autogen_plot_for_task1.pdf b/BioModels/BIOMD0000000138/tests/d1_plots_remote/tellurium_autogen_plot_for_task1.pdf
diff --git a/BioModels/BIOMD0000000138/tests/results_compatibility_biosimulators.md b/BioModels/BIOMD0000000138/tests/results_compatibility_biosimulators.md
diff --git a/BioModels/BIOMD0000000138/tests/results_local.json b/BioModels/BIOMD0000000138/tests/results_local.json
diff --git a/BioModels/BIOMD0000000138/tests/results_remote.json b/BioModels/BIOMD0000000138/tests/results_remote.json
diff --git a/BioModels/parse_biomodels.py b/BioModels/parse_biomodels.py
@@ -24,6 +24,8 @@
 sys.path.append("..")
 import utils
 
+matplotlib.use('Agg') #prevent matplotlib from trying to open a window
+
 API_URL: str = "https://www.ebi.ac.uk/biomodels"
 
 out_format="json"
@@ -181,7 +183,7 @@ def main():
 
     #caching is used to prevent the need to download the same responses from the remote server multiple times during testing
     #mode="off" to disable caching, "store" to wipe and store fresh results, "reuse" to use the stored cache
-    cache = utils.RequestCache(mode="auto",direc="cache")
+    cache = utils.RequestCache(mode="store",direc="cache")
 
     #accumulate results in columns defined by keys which correspond to the local variable names to be used below
     #to allow automated loading into the columns

diff --git a/BioModels/test_biomodels_compatibility_biosimulators.py b/BioModels/test_biomodels_compatibility_biosimulators.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python3
+
+md_description = \
+'''
+Download and and create results table for curated models from BioModels https://www.ebi.ac.uk/biomodels.
+
+'''
+
+import pyneuroml.sbml #for validate_sbml_files
+import pyneuroml.sedml #for validate_sedml_files
+
+import re
+import os
+import urllib
+import sys
+import matplotlib
+
+sys.path.append("..")
+import utils
+
+matplotlib.use('Agg') #prevent plotting from trying to open a window
+
+API_URL: str = "https://www.ebi.ac.uk/biomodels"
+
+out_format="json"
+max_count = 0 #0 for unlimited
+
+#local temporary storage of the model files
+#this is independent of caching, and still happens when caching is turned off
+#this allows the model to be executed and the files manually examined etc
+tmp_dir = "tmplocalfiles"
+
+#suppress stdout/err output from validation functions to make progress counter readable
+suppress_stdout = True
+suppress_stderr = True
+
+#whether to replace "model.xml" in the sedml file with the name of the actual sbml file
+fix_broken_ref = True
+
+#skip tests that cause the script to be killed due to lack of RAM
+#needs at least 8GB
+skip = {}
+
+def download_file(model_id,filename,output_file,cache):
+    '''
+    request the given file and save it to disk
+    '''
+
+    qfilename = urllib.parse.quote_plus(filename)
+
+    response = cache.do_request(f'{API_URL}/model/download/{model_id}?filename={qfilename}').content
+
+    with open(output_file,"wb") as fout:
+        fout.write(response)
+
+def replace_model_xml(sedml_path,sbml_filename):
+    '''
+    if the SEDML refers to a generic "model.xml" file
+    and the SBML file is not called this
+    replace the SEDML reference with the actual SBML filename
+
+    method used assumes 'source="model.xml"' will only
+    occur in the SBML file reference
+    which was true at time of testing on current BioModels release
+
+    returns True if the SBML reference already seemed valid
+    '''
+
+    if sbml_filename == "model.xml": return True
+
+    with open(sedml_path,encoding='utf-8') as f:
+        data = f.read()
+
+    if not 'source="model.xml"' in data: return True
+
+    data = data.replace('source="model.xml"',f'source="{sbml_filename}"')
+
+    with open(f'{sedml_path}',"w",encoding="utf-8") as fout:
+        fout.write(data)
+
+    return False
+
+def validate_sbml_file(model_id,mtab,info,cache,sup):
+    '''
+    tasks relating to validating the SBML file
+    return None to indicate aborting any further tests on this model
+    otherwise return the SBML filename
+    '''
+
+    #handle only single SBML files
+    if not info['format']['name'] == "SBML":
+        mtab['valid_sbml'] = ['NonSBML', f"{info['format']['name']}:{info['files']['main']}"]
+        return None
+
+    if len(info['files']['main']) > 1:
+        mtab['valid_sbml'] = ['MultipleSBMLs',f"{info['files']['main']}"]
+        return None
+
+    if len(info['files']['main']) < 1:
+        mtab['valid_sbml'] = ['NoSBMLs',f"{info['files']['main']}"]
+        return None
+
+    #download the sbml file
+    sbml_file = info['files']['main'][0]['name']
+    try:
+        download_file(model_id,sbml_file,sbml_file,cache)
+    except Exception as e:
+        mtab['valid_sbml'] = ['DownloadFail',f"{sbml_file} {e}"]
+        return None
+
+    #validate the sbml file
+    sup.suppress() #suppress validation warning/error messages  
+    valid_sbml = pyneuroml.sbml.validate_sbml_files([sbml_file], strict_units=False)
+    valid_sbml_units = pyneuroml.sbml.validate_sbml_files([sbml_file], strict_units=True)
+    sup.restore()
+
+    mtab['valid_sbml'] = ['pass' if valid_sbml else 'FAIL', f'[{sbml_file}]({API_URL}/{model_id}#Files)']
+    mtab['valid_sbml_units'] = 'pass' if valid_sbml_units else 'FAIL'
+
+    return sbml_file
+
+def validate_sedml_file(model_id,mtab,info,cache,sup,sbml_file):
+    '''
+    tasks relating to validating the SEDML file
+    return None to indicate aborting any further tests on this model
+    otherwise return the SEDML filename
+    '''
+
+    #must have a SEDML file as well in order to be executed
+    if not 'additional' in info['files']:
+        mtab['valid_sedml'] = f"NoSEDML"
+        return None
+
+    sedml_file = []
+    for file_info in info['files']['additional']:
+        pattern = 'SED[-]?ML'
+        target = f"{file_info['name']}|{file_info['description']}".upper()
+        if re.search(pattern,target):
+            sedml_file.append(file_info['name'])
+
+    #require exactly one SEDML file
+    if len(sedml_file) == 0:
+        mtab['valid_sedml'] = "NoSEDML"
+        return None
+
+    if len(sedml_file) > 1:
+        mtab['valid_sedml'] = ["MultipleSEDMLs",f"{sedml_file}"]
+        return None
+
+    #download sedml file
+    sedml_file = sedml_file[0]
+    try:
+        download_file(model_id,sedml_file,sedml_file,cache)
+    except:
+        mtab['valid_sedml'] = ["DownloadFail",f"{sedml_file}"]
+        return None
+
+    #if the sedml file contains a generic 'source="model.xml"' replace it with the sbml filename
+    if fix_broken_ref:
+        broken_ref = replace_model_xml(sedml_file,sbml_file)
+        mtab['broken_ref'] = 'pass' if broken_ref else 'FAIL'
+    else:
+        mtab['broken_ref'] = 'NA'
+
+    sup.suppress()
+    valid_sedml = pyneuroml.sedml.validate_sedml_files([sedml_file])
+    sup.restore()
+    mtab['valid_sedml'] = ['pass' if valid_sedml else 'FAIL', f'[{sedml_file}]({API_URL}/{model_id}#Files)']
+
+    return sedml_file
+
+def main():
+    '''
+    download the BioModel model files, run various validation steps
+    report the results as a markdown table README file with a summary row at the top
+    '''
+
+    #caching is used to prevent the need to download the same responses from the remote server multiple times during testing
+    #mode="off" to disable caching, "store" to wipe and store fresh results, "reuse" to use the stored cache
+    cache = utils.RequestCache(mode="store",direc="cache")
+
+    #accumulate results in columns defined by keys which correspond to the local variable names to be used below
+    #to allow automated loading into the columns
+    column_labels = "Model     |valid-sbml|valid-sbml-units|valid-sedml|broken-ref|tellurium"
+    column_keys  =  "model_desc|valid_sbml|valid_sbml_units|valid_sedml|broken_ref|tellurium_outcome"
+    mtab = utils.MarkdownTable(column_labels,column_keys)
+
+    #allow stdout/stderr from validation tests to be suppressed to improve progress count visibility
+    sup = utils.SuppressOutput(stdout=suppress_stdout,stderr=suppress_stderr)
+
+    #get list of all available models
+    model_ids = cache.do_request(f"{API_URL}/model/identifiers?format={out_format}").json()['models']
+    count = 0
+    starting_dir = os.getcwd()
+
+    if cases != None:
+        model_ids = cases
+
+    for model_id in model_ids:
+        #allow testing on a small sample of models
+        if max_count > 0 and count >= max_count:
+            break
+        count += 1
+        print(f"\r{model_id} {count}/{len(model_ids)}",end='')
+
+        #only process curated models
+        #BIOMD ids should be the curated models
+        if not 'BIOMD' in model_id:
+            continue
+
+        #skip if on the list to be skipped
+        if count in skip or model_id in skip:
+            continue
+
+        #from this point the model will create an output row even if not all tests are run
+        mtab.new_row() #append empty placeholder row
+        info = cache.do_request(f"{API_URL}/{model_id}?format={out_format}").json()
+
+        if len(info['name']) > 36:
+            model_summary = f"[{model_id}]({API_URL}/{model_id})<br/><sup>{info['name'][:30]}</sup>"
+            model_details = f"<sup>{info['name']}</sup>"
+            mtab['model_desc'] = mtab.make_fold(model_summary,model_details)
+        else:
+            mtab['model_desc'] = f"[{model_id}]({API_URL}/{model_id})<br/><sup>{info['name']}</sup>"
+
+        #make temporary downloads of the sbml and sedml files
+        model_dir = os.path.join(starting_dir,model_id)
+        os.makedirs(model_dir,exist_ok=True)
+        os.chdir(model_dir)
+
+        #sbml file validation tasks, includes downloading a local copy
+        sbml_file = validate_sbml_file(model_id,mtab,info,cache,sup)
+        if not sbml_file: continue # no further tests possible
+
+        sedml_file = validate_sedml_file(model_id,mtab,info,cache,sup,sbml_file)
+        if not sedml_file: continue # no further tests possible
+
+        test_folder = 'tests'
+        # engine_list = list(utils.ENGINES.keys())
+
+        utils.run_biosimulators_remotely_and_locally(engine_list,
+                        os.path.basename(sedml_file), 
+                        os.path.basename(sbml_file),
+                        os.path.join(test_folder,'d1_plots_remote'), 
+                        os.path.join(test_folder,'d1_plots_local'),
+                        test_folder=test_folder,
+                        remove_output=remove_output)
+
+if __name__ == "__main__":
+    cases = ["BIOMD0000000001","BIOMD0000000138"]
+    remove_output = False
+    engine_list = ['copasi']
+    main()
diff --git a/utils/__init__.py b/utils/__init__.py
@@ -461,17 +461,20 @@ def check_file_compatibility_test(engine, model_filepath, experiment_filepath):
     if file_extensions in engine_filetypes_tuple_list:
         file_types = [TYPES[i] for i in file_extensions]
         return 'pass', (f"The file extensions {file_extensions} suggest the input file types are '{file_types}'. {compatible_filetypes} are compatible with {engine}.")
+
     if 'xml' in file_extensions:
-            if 'sbml' in model_filepath and 'sedml' not in model_filepath:
-                if 'sbml' in experiment_filepath and 'sedml' in experiment_filepath:
-                    file_types = ('sbml', 'sedml')
-                    if file_types in engine_filetypes_tuple_list:
-                        return 'pass', (f"The filenames '{model_filepath}' and '{experiment_filepath}' suggest the input files are {[TYPES[i] for i in file_types]} which is compatible with {engine}.<br><br>{compatible_filetypes} are compatible with {engine}.")
-                    else: 
-                        return 'unsure', (f"The filenames '{model_filepath}' and '{experiment_filepath}' suggest the input files are {[TYPES[i] for i in file_types]} which is not compatible with {engine}.<br><br>{compatible_filetypes} are compatible with {engine}.")
+        model_sbml = 'sbml' in model_filepath
+        model_sedml = 'sedml' in model_filepath
+        experiment_sbml = 'sbml' in experiment_filepath
+        experiment_sedml = 'sedml' in experiment_filepath
+
+        if model_sbml and experiment_sbml and experiment_sedml and not model_sedml:
+            file_types = [TYPES[i] for i in ('sbml', 'sedml') ]
+            return 'pass', (f"The filenames '{model_filepath}' and '{experiment_filepath}' suggest the input files are {file_types} which is compatible with {engine}.<br><br>{compatible_filetypes} are compatible with {engine}.")
+        else:
+            return 'unsure', (f"The file extensions {file_extensions} suggest the input file types may be compatibe with {engine}.<br><br>{compatible_filetypes} are compatible with {engine}.")
     else:
         return 'unsure', (f"The file extensions {file_extensions} suggest the input file types may not be compatibe with {engine}.<br><br>{compatible_filetypes} are compatible with {engine}.")
-
 
 def collapsible_content(content, title='Details'):
     """
@@ -1185,7 +1188,8 @@ def run_biosimulators_remotely(engine_keys,
                                sedml_file_name, 
                                sbml_file_name, 
                                d1_plots_remote_dir,  
-                               test_folder='tests'):
+                               test_folder='tests',
+                               remove_output_remote=True):
 
     """ run with directory pointing towards the location of the sedml and sbml files"""
 
@@ -1217,18 +1221,19 @@ def run_biosimulators_remotely(engine_keys,
     file_paths = find_files(remote_output_dir, '.pdf')
     move_d1_files(file_paths, d1_plots_remote_dir)
 
-    # remove the remote results directory
-    if os.path.exists(remote_output_dir):
-        shutil.rmtree(remote_output_dir)
-        print('Removed ' + remote_output_dir + ' folder')
+    if remove_output_remote == True:# remove the remote results directory
+        if os.path.exists(remote_output_dir):
+            shutil.rmtree(remote_output_dir)
+            print('Removed ' + remote_output_dir + ' folder')
 
     return results_remote
 
 def run_biosimulators_locally(engine_keys,
                               sedml_file_name, 
                               sbml_file_name, 
                               d1_plots_local_dir, 
-                              test_folder='tests'):
+                              test_folder='tests',
+                              remove_output_local=True):
 
     engines = {k: v for k, v in ENGINES.items() if k in engine_keys}
     results_local = {}
@@ -1247,9 +1252,10 @@ def run_biosimulators_locally(engine_keys,
     move_d1_files(file_paths, d1_plots_local_dir)
 
     # if it exists remove the output folder
-    if os.path.exists(local_output_dir):
-        shutil.rmtree(local_output_dir)
-        print('Removed ' + local_output_dir + ' folder')
+    if remove_output_local == True:
+        if os.path.exists(local_output_dir):
+            shutil.rmtree(local_output_dir)
+            print('Removed ' + local_output_dir + ' folder')
 
     return results_local
 
@@ -1313,19 +1319,22 @@ def run_biosimulators_remotely_and_locally(engine_keys,
                                  sbml_file_name,
                                  d1_plots_remote_dir, 
                                  d1_plots_local_dir,
-                                 test_folder='tests'):
+                                 test_folder='tests',
+                                 remove_output=True):
 
     results_remote = run_biosimulators_remotely(engine_keys,
                                     sedml_file_name=sedml_file_name, 
                                     sbml_file_name=sbml_file_name,
                                     d1_plots_remote_dir=d1_plots_remote_dir, 
-                                    test_folder=test_folder)
+                                    test_folder=test_folder,
+                                    remove_output_remote = remove_output)
 
     results_local = run_biosimulators_locally(engine_keys,
                                     sedml_file_name=sedml_file_name, 
                                     sbml_file_name=sbml_file_name,
                                     d1_plots_local_dir=d1_plots_local_dir, 
-                                    test_folder=test_folder)
+                                    test_folder=test_folder,
+                                    remove_output_local = remove_output)
 
     results_table = create_combined_results_table(results_remote, 
                                     results_local,