make only multiple models usable #46

draeger-lab · Mar 7, 2023 · 9b21b73 · 9b21b73
1 parent ef9b9bd
commit 9b21b73
Show file tree

Hide file tree

Showing 4 changed files with 205 additions and 194 deletions.
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -2,10 +2,10 @@ name: Update Docs
 
 on: 
   workflow_dispatch:
-#  push:
-#    paths:
-#    - '**.rst'
-#    - 'docs/*'
+  push:
+    paths:
+    - '**.rst'
+    - 'docs/*'
 
 jobs:
   run:

diff --git a/config.yaml b/config.yaml
@@ -25,6 +25,7 @@ multiple_paths: # enter as many paths as you need below
   - 'data/e_coli_core.xml'
   - ''
   - ''
+single: TRUE # set to False if you only want to work with the multiple models
 
 # media to simulate growth from, just comment the media you do not want with a #
 media: 

diff --git a/main.py b/main.py
@@ -34,6 +34,9 @@ def main(configpath=None):
             logging.StreamHandler()
         ]
     )
+    logging.getLogger('cobra').setLevel(logging.WARNING)
+    logging.getLogger('requests').setLevel(logging.WARNING)
+    logging.getLogger('urllib3').setLevel(logging.WARNING)
     logging.info('----------- New run of refineGEMs -----------')
     rg.databases.initialise_database()
 
@@ -70,134 +73,135 @@ def main(configpath=None):
             heatmap.savefig(config['out_path'] + 'visualization/' + 'heatmap_dt_additives_' + str(today) + '.png')
             binary_heatmap.savefig(config['out_path'] + 'visualization/' + 'heatmap_native_' + str(today) + '.png', bbox_inches='tight')
             ini_plot.savefig(config['out_path'] + 'visualization/' + 'model_status_' + str(today) + '.png', bbox_inches='tight')
-
-    try:    
-        model_cobra, errors = cobra.io.sbml.validate_sbml_model(config['model'])
-        logging.info(errors)
-    except (OSError):
-        model_cobra = None
-        logging.info('No or no valid model given, please enter a valid path in the model field in the config file.')
+
+    if (config['single']):        
+        try:    
+            model_cobra, errors = cobra.io.sbml.validate_sbml_model(config['model'])
+            logging.info(errors)
+        except (OSError):
+            model_cobra = None
+            logging.info('No or no valid model given, please enter a valid path in the model field in the config file.')
 
-    if (config['keggpathways']):
-        model_libsbml, non_kegg = rg.pathways.kegg_pathways(config['model'])
-        file = open(config['out_path'] + model_libsbml.getId() + '_reac_wo_kegg_' + str(today) + '.txt','w')
-        for reaction in non_kegg:
-            file.write(reaction+"\n")
-        file.close()
-        logging.info('Kegg Pathways were added to the model as groups. Reactions that have no KEGG annotation are denoted in ' + model_libsbml.getId() + '_reac_wo_kegg.txt')
+        if (config['keggpathways']):
+            model_libsbml, non_kegg = rg.pathways.kegg_pathways(config['model'])
+            file = open(config['out_path'] + model_libsbml.getId() + '_reac_wo_kegg_' + str(today) + '.txt','w')
+            for reaction in non_kegg:
+                file.write(reaction+"\n")
+            file.close()
+            logging.info('Kegg Pathways were added to the model as groups. Reactions that have no KEGG annotation are denoted in ' + model_libsbml.getId() + '_reac_wo_kegg.txt')
 
-    else:
-        model_libsbml = rg.io.load_model_libsbml(config['model'])
-
-    if (config['sboterms']):
-        if (config['visualize']):
-            sbo_fig = rg.investigate.plot_rea_sbo_single(model_libsbml).get_figure()
-            # saving the created visualizations
-            sbo_fig.savefig(config['out_path'] + 'visualization/' + str(model_cobra.id) + '_ReacPerSBO_beforeUpdate_' + str(today) + '.png', bbox_inches='tight')
-        model_libsbml = rg.sboann.sbo_annotation(model_libsbml)
-        logging.info('SBO Terms updated for ' + model_libsbml.getId())
-
-    if (config['charge_corr']):
-        model_libsbml, multiple_charges = rg.charges.correct_charges_modelseed(model_libsbml)
-        pd.DataFrame.from_dict(multiple_charges, orient='index').to_csv(config['out_path'] + model_libsbml.getId() + '_mulchar_' + str(today) + '.csv', sep=',', header=False)
-        logging.info('Charges were corrected for ' + model_libsbml.getId() + '. A table with metabolites with multiple charges can be found under ' + model_libsbml.getId() + '_mulchar_' + str(today) + '.csv')
+        else:
+            model_libsbml = rg.io.load_model_libsbml(config['model'])
 
-    if(config['man_cur']):
-        if config['man_cur_type'] == 'gapfill':
-            gapfill = rg.io.load_manual_gapfill(config['man_cur_table'])
-            model_libsbml = rg.curate.add_reactions_from_table(model_libsbml, gapfill, config['entrez_email'])
-            logging.info('Manual gap filling was done for ' + model_libsbml.getId())
-        elif config['man_cur_type'] == 'metabs':
-            man_ann = rg.io.load_manual_annotations(config['man_cur_table'])
-            model_libsbml = rg.curate.update_annotations_from_table(model_libsbml, man_ann)
-            model_libsbml = rg.curate.update_annotations_from_others(model_libsbml)
-            logging.info('Manual update of annotations was done for ' + model_libsbml.getId())
+        if (config['sboterms']):
+            if (config['visualize']):
+                sbo_fig = rg.investigate.plot_rea_sbo_single(model_libsbml).get_figure()
+                # saving the created visualizations
+                sbo_fig.savefig(config['out_path'] + 'visualization/' + str(model_cobra.id) + '_ReacPerSBO_beforeUpdate_' + str(today) + '.png', bbox_inches='tight')
+            model_libsbml = rg.sboann.sbo_annotation(model_libsbml)
+            logging.info('SBO Terms updated for ' + model_libsbml.getId())
 
-    if config['gap_analysis'] and config['gapfill_model']:
-        filename = f'{config["out_path"]}{name}_gap_analysis_{str(today)}.xlsx'
-        if config['gap_analysis_params'].get('db_to_compare') not in ('BioCyc', 'KEGG+BioCyc'):
-            logging.warning('Currently, only the result from the \'BioCyc\' or \'KEGG+BioCyc\' runs can be directly added to a model.')
+        if (config['charge_corr']):
+            model_libsbml, multiple_charges = rg.charges.correct_charges_modelseed(model_libsbml)
+            pd.DataFrame.from_dict(multiple_charges, orient='index').to_csv(config['out_path'] + model_libsbml.getId() + '_mulchar_' + str(today) + '.csv', sep=',', header=False)
+            logging.info('Charges were corrected for ' + model_libsbml.getId() + '. A table with metabolites with multiple charges can be found under ' + model_libsbml.getId() + '_mulchar_' + str(today) + '.csv')
+
+        if(config['man_cur']):
+            if config['man_cur_type'] == 'gapfill':
+                gapfill = rg.io.load_manual_gapfill(config['man_cur_table'])
+                model_libsbml = rg.curate.add_reactions_from_table(model_libsbml, gapfill, config['entrez_email'])
+                logging.info('Manual gap filling was done for ' + model_libsbml.getId())
+            elif config['man_cur_type'] == 'metabs':
+                man_ann = rg.io.load_manual_annotations(config['man_cur_table'])
+                model_libsbml = rg.curate.update_annotations_from_table(model_libsbml, man_ann)
+                model_libsbml = rg.curate.update_annotations_from_others(model_libsbml)
+                logging.info('Manual update of annotations was done for ' + model_libsbml.getId())
+
+        if config['gap_analysis'] and config['gapfill_model']:
+            filename = f'{config["out_path"]}{name}_gap_analysis_{str(today)}.xlsx'
+            if config['gap_analysis_params'].get('db_to_compare') not in ('BioCyc', 'KEGG+BioCyc'):
+                logging.warning('Currently, only the result from the \'BioCyc\' or \'KEGG+BioCyc\' runs can be directly added to a model.')
+                gap_analysis = rg.gapfill.gap_analysis(model_libsbml, config['gap_analysis_params'], filename)
+                logging.info(f'Gap analysis for {model_libsbml.getId()} with {config["gap_analysis_params"].get("db_to_compare")} was performed.')
+                logging.info(f'Complete Excel table is in file: {filename}.')
+            else:
+                gapfill = rg.gapfill.gapfill(model_libsbml, config['gap_analysis_params'], filename)
+                gap_analysis_stats = gapfill[0]
+                logging.info(f'Statistics on missing entites for {model_libsbml.getId()}:')
+                logging.info(gap_analysis_stats)
+                logging.info(f'Complete Excel table is in file: {filename}.')
+                model_libsbml = gapfill[-1]
+                logging.info(f'Gaps were filled in {model_libsbml.getId()}.')
+        elif config['gap_analysis']:
+            filename = f'{config["out_path"]}{name}_gap_analysis_{str(today)}.xlsx'
             gap_analysis = rg.gapfill.gap_analysis(model_libsbml, config['gap_analysis_params'], filename)
             logging.info(f'Gap analysis for {model_libsbml.getId()} with {config["gap_analysis_params"].get("db_to_compare")} was performed.')
+            if  config["gap_analysis_params"].get("db_to_compare") != 'KEGG':
+                logging.info(f'Statistics on missing entites for {model_libsbml.getId()}:')
+                logging.info(gap_analysis[0])
             logging.info(f'Complete Excel table is in file: {filename}.')
-        else:
-            gapfill = rg.gapfill.gapfill(model_libsbml, config['gap_analysis_params'], filename)
-            gap_analysis_stats = gapfill[0]
-            logging.info(f'Statistics on missing entites for {model_libsbml.getId()}:')
-            logging.info(gap_analysis_stats)
-            logging.info(f'Complete Excel table is in file: {filename}.')
-            model_libsbml = gapfill[-1]
+        elif config['gapfill_model']:
+            model_libsbml = rg.gapfill.gapfill_model(model_libsbml, config['gap_analysis_file'])
             logging.info(f'Gaps were filled in {model_libsbml.getId()}.')
-    elif config['gap_analysis']:
-        filename = f'{config["out_path"]}{name}_gap_analysis_{str(today)}.xlsx'
-        gap_analysis = rg.gapfill.gap_analysis(model_libsbml, config['gap_analysis_params'], filename)
-        logging.info(f'Gap analysis for {model_libsbml.getId()} with {config["gap_analysis_params"].get("db_to_compare")} was performed.')
-        if  config["gap_analysis_params"].get("db_to_compare") != 'KEGG':
-            logging.info(f'Statistics on missing entites for {model_libsbml.getId()}:')
-            logging.info(gap_analysis[0])
-        logging.info(f'Complete Excel table is in file: {filename}.')
-    elif config['gapfill_model']:
-        model_libsbml = rg.gapfill.gapfill_model(model_libsbml, config['gap_analysis_file'])
-        logging.info(f'Gaps were filled in {model_libsbml.getId()}.')
-
-    if (config['polish']):
-        model_libsbml = rg.polish.polish(model_libsbml, config['entrez_email'], config['id_db'], config['protein_fasta'], config['lab_strain'])
-        logging.info(model_libsbml.getId() + ' has been polished')
-
-    mods = [config['keggpathways'], config['sboterms'], config['charge_corr'], config['man_cur'], config['gapfill_model'], config['polish']]
-
-    if any(mods):
-        if config['model_out'] == 'stdout':   
-            config['model_out'] = config['out_path'] + model_libsbml.getId() + '_modified_' + str(today) + '.xml'
-
-        rg.io.write_to_file(model_libsbml, config['model_out'])
 
-        if model_cobra is not None:                                          
-            try:    
-                model_cobra, errors = cobra.io.sbml.validate_sbml_model(config['model_out'])
-                logging.info(errors)
-            except (OSError):
-                model_cobra = None
-                logging.info('Model was invalidated during curation steps.')
+        if (config['polish']):
+            model_libsbml = rg.polish.polish(model_libsbml, config['entrez_email'], config['id_db'], config['protein_fasta'], config['lab_strain'])
+            logging.info(model_libsbml.getId() + ' has been polished')
+
+        mods = [config['keggpathways'], config['sboterms'], config['charge_corr'], config['man_cur'], config['gapfill_model'], config['polish']]
+
+        if any(mods):
+            if config['model_out'] == 'stdout':   
+                config['model_out'] = config['out_path'] + model_libsbml.getId() + '_modified_' + str(today) + '.xml'
+
+            rg.io.write_to_file(model_libsbml, config['model_out'])
+
+            if model_cobra is not None:                                          
+                try:    
+                    model_cobra, errors = cobra.io.sbml.validate_sbml_model(config['model_out'])
+                    logging.info(errors)
+                except (OSError):
+                    model_cobra = None
+                    logging.info('Model was invalidated during curation steps.')
 
-    if (model_cobra != None):
-        logging.info(model_cobra.id + ' will be investigated.')
-        name, reac, metab, genes = rg.investigate.initial_analysis(model_libsbml)
-        orphans, deadends, disconnected = rg.investigate.get_orphans_deadends_disconnected(model_cobra)
-        mass_unbal, charge_unbal = rg.investigate.get_mass_charge_unbalanced(model_cobra)
-        egc = rg.investigate.get_egc(model_cobra)
-        if (config['visualize']):
-            logging.info('All visualizations can be found in the subfolder "visualization".')
-            sbo_fig = rg.investigate.plot_rea_sbo_single(model_libsbml).get_figure()
+        if (model_cobra != None):
+            logging.info(model_cobra.id + ' will be investigated.')
+            name, reac, metab, genes = rg.investigate.initial_analysis(model_libsbml)
+            orphans, deadends, disconnected = rg.investigate.get_orphans_deadends_disconnected(model_cobra)
+            mass_unbal, charge_unbal = rg.investigate.get_mass_charge_unbalanced(model_cobra)
+            egc = rg.investigate.get_egc(model_cobra)
+            if (config['visualize']):
+                logging.info('All visualizations can be found in the subfolder "visualization".')
+                sbo_fig = rg.investigate.plot_rea_sbo_single(model_libsbml).get_figure()
+
+                # saving the created visualizations
+                sbo_fig.savefig(config['out_path'] + 'visualization/' + str(model_cobra.id) + '_ReacPerSBO_' + str(today) + '.png', bbox_inches='tight')
 
-            # saving the created visualizations
-            sbo_fig.savefig(config['out_path'] + 'visualization/' + str(model_cobra.id) + '_ReacPerSBO_' + str(today) + '.png', bbox_inches='tight')
-
-        if (config['memote']):
-            score = rg.investigate.get_memote_score(rg.investigate.run_memote(model_cobra))
+            if (config['memote']):
+                score = rg.investigate.get_memote_score(rg.investigate.run_memote(model_cobra))
+                
+            if(config['modelseed']):
+                charge_mismatch, formula_mismatch = rg.modelseed.compare_to_modelseed(model_cobra)
 
-        if(config['modelseed']):
-            charge_mismatch, formula_mismatch = rg.modelseed.compare_to_modelseed(model_cobra)
-
-        if (config['media'] != None):
-            growth_sim = rg.growth.get_growth_selected_media(model_cobra, config['media'], config['growth_basis'])
-
-        if (config['memote'] == True):
-            information = [[name], [reac], [metab], [genes], [score], orphans, deadends, disconnected, mass_unbal, charge_unbal]
-            model_params = pd.DataFrame(information, ['model name', '#reactions', '#metabolites', '#genes', 'memote score', 'orphans', 'deadends', 'disconnected', 'mass unbalanced', 'charge unbalanced']).T
+            if (config['media'] != None):
+                growth_sim = rg.growth.get_growth_selected_media(model_cobra, config['media'], config['growth_basis'])
+
+            if (config['memote'] == True):
+                information = [[name], [reac], [metab], [genes], [score], orphans, deadends, disconnected, mass_unbal, charge_unbal]
+                model_params = pd.DataFrame(information, ['model name', '#reactions', '#metabolites', '#genes', 'memote score', 'orphans', 'deadends', 'disconnected', 'mass unbalanced', 'charge unbalanced']).T
+            else:
+                information = [[name], [reac], [metab], [genes], orphans, deadends, disconnected, mass_unbal, charge_unbal]
+                model_params = pd.DataFrame(information, ['model name', '#reactions', '#metabolites', '#genes', 'orphans', 'deadends', 'disconnected', 'mass unbalanced', 'charge unbalanced']).T
+            with pd.ExcelWriter(config['out_path'] + name + '_' + str(today) + '.xlsx') as writer:  
+                model_params.to_excel(writer, sheet_name='model params', index=False)
+                growth_sim.to_excel(writer, sheet_name='growth simulation', index=False)
+                egc.to_excel(writer, sheet_name='EGC test', index=False)
+                if(config['modelseed']):
+                    charge_mismatch.to_excel(writer, sheet_name='charge mismatches', index=False)
+                    formula_mismatch.to_excel(writer, sheet_name='formula mismatches', index=False)
+                logging.info('Single model growth simulation results are saved to ' + name + '_' + str(today) + '.xlsx')
         else:
-            information = [[name], [reac], [metab], [genes], orphans, deadends, disconnected, mass_unbal, charge_unbal]
-            model_params = pd.DataFrame(information, ['model name', '#reactions', '#metabolites', '#genes', 'orphans', 'deadends', 'disconnected', 'mass unbalanced', 'charge unbalanced']).T
-        with pd.ExcelWriter(config['out_path'] + name + '_' + str(today) + '.xlsx') as writer:  
-            model_params.to_excel(writer, sheet_name='model params', index=False)
-            growth_sim.to_excel(writer, sheet_name='growth simulation', index=False)
-            egc.to_excel(writer, sheet_name='EGC test', index=False)
-            if(config['modelseed']):
-                charge_mismatch.to_excel(writer, sheet_name='charge mismatches', index=False)
-                formula_mismatch.to_excel(writer, sheet_name='formula mismatches', index=False)
-            logging.info('Single model growth simulation results are saved to ' + name + '_' + str(today) + '.xlsx')
-    else:
-        logging.info('No valid model, investigation aborted!')
+            logging.info('No valid model, investigation aborted!')
 
 if __name__ == "__main__":
     try: