From 593f66d81192c6aaaff70e60b936b7093606911d Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 2 Oct 2017 19:50:21 -0400 Subject: [PATCH 01/19] Fixing some replace error that replaced 'rate4site' with batchfile name.. --- res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf | 4 ++-- res/TemplateBatchFiles/relative_nucleotide_rates.bf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf b/res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf index fd15c1239..77a83abff 100644 --- a/res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf +++ b/res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf @@ -20,10 +20,10 @@ LoadFunctionLibrary("libv3/models/protein.bf"); utility.ToggleEnvVariable ("NORMALIZE_SEQUENCE_NAMES", 1); relative_prot_rates.analysis_description = { - terms.io.info: "For a fixed alignment and tree, infer **relative** site specific substitution rates, + terms.io.info: "RELprot (RELative protein rates) infers, for a fixed alignment and tree, **relative** site specific substitution rates, by first optimizing alignment-wide branch lengths, and then inferring a site-specific uniform tree scaler", terms.io.version: "0.1alpha", - terms.io.reference: "@TBD. Analysis based on Rate4Site method: Pupko, T., Bell, R. E., Mayrose, I., Glaser, F. & Ben-Tal, N. relative_prot_rates: an algorithmic tool for the identification of functional regions in proteins by surface mapping of evolutionary determinants within their homologues. Bioinformatics 18, S71–S77 (2002).", + terms.io.reference: "@TBD. Analysis based on Rate4Site method: Pupko, T., Bell, R. E., Mayrose, I., Glaser, F. & Ben-Tal, N. Rate4Site: an algorithmic tool for the identification of functional regions in proteins by surface mapping of evolutionary determinants within their homologues. Bioinformatics 18, S71–S77 (2002).", terms.io.authors: "Sergei L Kosakovsky Pond and Stephanie J Spielman", terms.io.contact: "{spond,stephanie.spielman}@temple.edu" }; diff --git a/res/TemplateBatchFiles/relative_nucleotide_rates.bf b/res/TemplateBatchFiles/relative_nucleotide_rates.bf index 525263f4b..a9150774a 100644 --- a/res/TemplateBatchFiles/relative_nucleotide_rates.bf +++ b/res/TemplateBatchFiles/relative_nucleotide_rates.bf @@ -23,10 +23,10 @@ LoadFunctionLibrary("libv3/models/DNA/JC69.bf"); utility.ToggleEnvVariable ("NORMALIZE_SEQUENCE_NAMES", 1); relative_nuc_rates.analysis_description = { - terms.io.info: "For a fixed **nucleotide** alignment and tree, infer **relative** site specific substitution rates, + terms.io.info: "RELnuc (RELative nucleotide rates) infers, for a fixed **nucleotide** alignment and tree, **relative** site specific substitution rates, by first optimizing alignment-wide branch lengths, and then inferring a site-specific uniform tree scaler", terms.io.version: "0.1alpha", - terms.io.reference: "@TBD. Analysis based on Rate4Site method, extended for nucleotides: Pupko, T., Bell, R. E., Mayrose, I., Glaser, F. & Ben-Tal, N. relative_nuc_rates: an algorithmic tool for the identification of functional regions in proteins by surface mapping of evolutionary determinants within their homologues. Bioinformatics 18, S71–S77 (2002).", + terms.io.reference: "@TBD. Analysis based on Rate4Site method, extended for nucleotides: Pupko, T., Bell, R. E., Mayrose, I., Glaser, F. & Ben-Tal, N. Rate4Site: an algorithmic tool for the identification of functional regions in proteins by surface mapping of evolutionary determinants within their homologues. Bioinformatics 18, S71–S77 (2002).", terms.io.authors: "Sergei L Kosakovsky Pond and Stephanie J Spielman", terms.io.contact: "{spond,stephanie.spielman}@temple.edu" }; From 8bb922c5388ed8e68899cb83ed78e8120ffbc5e0 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 4 Oct 2017 10:59:01 -0400 Subject: [PATCH 02/19] Addressing issue where tree wasn't written to JSON --- res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf | 1 + 1 file changed, 1 insertion(+) diff --git a/res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf b/res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf index 77a83abff..d33822c6e 100644 --- a/res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf +++ b/res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf @@ -239,6 +239,7 @@ io.ReportProgressMessageMD ("relative_prot_rates", "Stats", "* [95% Range] " + +tree_definition = utility.Map (relative_prot_rates.partitions_and_trees, "_partition_", '_partition_[terms.data.tree]'); io.SpoolJSON ({ terms.json.input : {terms.json.file: relative_prot_rates.alignment_info[terms.data.file], terms.json.sequences: relative_prot_rates.alignment_info[terms.data.sequences], From 3c5761e7accda928c33ef07da1721843ea75f1d1 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Sun, 8 Oct 2017 19:54:43 -0400 Subject: [PATCH 03/19] Updates to ProteinFitter. Addressed (non-)optimizer issue, removed caching, updated JSON format --- .../ProteinAnalyses/ProteinGTRFit.bf | 145 +++++++------- .../ProteinAnalyses/ProteinGTRFit_helper.ibf | 183 ++++++++---------- tests/hbltests/libv3/ProteinGTRFit.wbf | 15 +- 3 files changed, 155 insertions(+), 188 deletions(-) diff --git a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf index 881844bf5..0a051aa35 100644 --- a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf +++ b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf @@ -1,4 +1,4 @@ -RequireVersion("2.3.3"); +RequireVersion("2.3.4"); LoadFunctionLibrary("libv3/UtilityFunctions.bf"); LoadFunctionLibrary("libv3/IOFunctions.bf"); LoadFunctionLibrary("libv3/stats.bf"); @@ -22,7 +22,6 @@ LoadFunctionLibrary("ProteinGTRFit_helper.ibf"); /*------------------------------------------------------------------------------*/ utility.ToggleEnvVariable ("NORMALIZE_SEQUENCE_NAMES", 1); -utility.SetEnvVariable ("OPTIMIZATION_PRECISION", 0.1); // for testing purposes. utility.ToggleEnvVariable ("PRODUCE_OPTIMIZATION_LOG", 1); // for testing purposes io.DisplayAnalysisBanner({ @@ -41,7 +40,9 @@ protein_gtr.logl = terms.fit.log_likelihood; protein_gtr.phase = terms.fit.phase; protein_gtr.json.information = "information"; protein_gtr.baseline_phase = "Baseline Phase"; - +protein_gtr.final_phase = "REV-Final"; +protein_gtr.rev_phase_prefix = "REV-Phase-"; +protein_gtr.bl_phase_prefix = "REV-local-Phase-"; protein_gtr.options.convergence_type = "convergence type"; protein_gtr.options.tolerance = "tolerance"; protein_gtr.options.baseline_model = "baseline model"; @@ -55,50 +56,28 @@ protein_gtr.options.rate_variation = "use rate variation"; SetDialogPrompt ("Supply a list of files to include in the analysis (one per line)"); fscanf (PROMPT_FOR_FILE, "Lines", protein_gtr.file_list); protein_gtr.listfile = utility.getGlobalValue("LAST_FILE_PATH"); -protein_gtr.cache_file = protein_gtr.listfile + ".cache"; protein_gtr.json_file = protein_gtr.listfile + ".json"; protein_gtr.file_list = io.validate_a_list_of_files (protein_gtr.file_list); protein_gtr.file_list_count = Abs (protein_gtr.file_list); +protein_gtr.index_to_filename = utility.SwapKeysAndValues(protein_gtr.file_list); -// Populate analysis_results and important variables from cache, or prompt for all variables. -/* -if (io.FileExists(protein_gtr.cache_file)) { - protein_gtr.use_cache = io.SelectAnOption ({{"YES", "Resume analysis using the detected cache file."}, {"NO", "Launch a new analysis and *overwrite* the detected cache file."}}, "A cache file of a prior analysis on this list of files was detected. Would you like to use it?"); -} -*/ -protein_gtr.use_cache = "NO"; - -// Load all information from cache -if (protein_gtr.use_cache == "YES" || protein_gtr.use_cache == 1){ - protein_gtr.analysis_results = io.LoadCacheFromFile (protein_gtr.cache_file); - protein_gtr.load_cached_options(); // Load previously selected options into namespace +/********* PROMPTS **********/ +protein_gtr.analysis_results = {}; -} -// Prompt for all information -else { +// Prompt for convergence assessment type +protein_gtr.convergence_type = io.SelectAnOption( protein_gtr.convergence_options, "Select a convergence criterion."); - protein_gtr.analysis_results = {}; +// Prompt for threshold +protein_gtr.tolerance = io.PromptUser ("\n>Provide a tolerance level for convergence assessment (Default 0.01)",0.01,0,1,FALSE); // default, lower, upper, is_integer - // Prompt for convergence assessment type - protein_gtr.convergence_type = io.SelectAnOption( protein_gtr.convergence_options, "Select a convergence criterion."); +// Prompt for baseline AA model +protein_gtr.baseline_model = io.SelectAnOption (models.protein.empirical_models, + "Select an empirical protein model to use for optimizing the provided branch lengths (we recommend LG):"); +// Prompt for rate variation +protein_gtr.use_rate_variation = io.SelectAnOption( protein_gtr.rate_variation_options, "Would you like to optimize branch lengths with rate variation (uses a four-category gamma)?"); - if (protein_gtr.convergence_type == "LogL"){ - protein_gtr.tolerance = io.PromptUser ("\n>Provide a tolerance level for convergence assessment (Default 0.01)",0.01,0,1,FALSE); // default, lower, upper, is_integer - } - else { - protein_gtr.tolerance = io.PromptUser ("\n>Provide a tolerance level for convergence assessment (Default 0.001)",0.01,0,1,FALSE); // default, lower, upper, is_integer - } +protein_gtr.save_options(); - // Prompt for baseline AA model - protein_gtr.baseline_model = io.SelectAnOption (models.protein.empirical_models, - "Select an empirical protein model to use for optimizing the provided branch lengths (we recommend LG):"); - - // Prompt for rate variation - protein_gtr.use_rate_variation = io.SelectAnOption( protein_gtr.rate_variation_options, "Would you like to optimize branch lengths with rate variation (uses a four-category gamma)?"); - - protein_gtr.save_options(); - -} if (protein_gtr.use_rate_variation == "Yes"){ protein_gtr.baseline_model_name = protein_gtr.baseline_model + "+F, with Gamma rates"; @@ -109,6 +88,7 @@ if (protein_gtr.use_rate_variation == "Yes"){ protein_gtr.baseline_model_desc = "protein_gtr.Baseline.ModelDescription"; protein_gtr.rev_model_branch_lengths = "protein_gtr.REV.ModelDescription"; } + /********************************************************************************************************************/ @@ -171,38 +151,62 @@ mpi.QueueComplete (protein_gtr.queue); // Sum of the logL from fitted baseline model across each data set protein_gtr.baseline_fit_logL = math.Sum (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.phase_key"), "_value_", "(_value_[protein_gtr.phase_key])[terms.fit.log_likelihood]")); - io.ReportProgressMessageMD ("Protein GTR Fitter", " * Initial branch length fit", "Overall Log(L) = " + protein_gtr.baseline_fit_logL); -//io.WriteCacheToFile (^"protein_gtr.cache_file", ^"protein_gtr.analysis_results"); - - /*************************** STEP TWO *************************** Perform an initial GTR fit on the data *****************************************************************/ console.log("\n\n[PHASE 2] Performing initial REV fit to the data"); -result_key = "REV-Phase-" + protein_gtr.fit_phase; +result_key = protein_gtr.rev_phase_prefix + protein_gtr.fit_phase; + + +current = utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/'" + protein_gtr.phase_key + "'"), "_value_", "_value_['" + protein_gtr.phase_key + "']"); +// console.log(utility.Keys(protein_gtr.analysis_results)); +// { +// {"options", "input", "0", "1"} +// } +// +// console.log(utility.Keys(protein_gtr.analysis_results["0"])); +// { +// {"input", "Baseline Phase"} +// } +//console.log(current); +// 0: +///EFV +/// BL +/// Trees +/// logl +///parameters +// 1: .... +protein_gtr.current_gtr_fit = protein_gtr.fitGTRtoFileList (current, None, result_key, FALSE); // last is bool argument, finalphase -if (utility.Has (protein_gtr.analysis_results, result_key, None)) { - io.ReportProgressMessageMD ("Protein GTR Fitter", result_key, - "Loaded cached results for '" + result_key + "'. Log(L) = " + (protein_gtr.analysis_results[result_key])[terms.fit.log_likelihood] ); - protein_gtr.current_gtr_fit = protein_gtr.analysis_results [result_key]; -} else { - protein_gtr.current_gtr_fit = protein_gtr.fitGTRtoFileList (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/'" + protein_gtr.phase_key + "'"), "_value_", "_value_['" + protein_gtr.phase_key + "']"), protein_gtr.current_gtr_fit, result_key, FALSE); -} -// Record logL + +// Record logL for some reason I think used to be useful but can't tell if still is protein_gtr.scores + (protein_gtr.analysis_results[result_key])[terms.fit.log_likelihood]; -/* Now that the initial GTR fit has been performed, we toggle between a GTR fit and a branch length fit under the estimated GTR parameters */ + +/* Extract the EFV for use in all model fits (until final tuneup) */ protein_gtr.shared_EFV = (utility.Values (protein_gtr.current_gtr_fit [terms.efv_estimate]))[0]; if (Type (protein_gtr.shared_EFV) == "String") { protein_gtr.shared_EFV = Eval (protein_gtr.shared_EFV); } +//console.log(protein_gtr.current_gtr_fit); +// global: sub rates +// EFV +// branch length +//// 0 +////// [all the branches] +//// 1 +////// branch lengthssssss +// Trees (dict now!) +// logl +// parameters + /********************************** STEP THREE ****************************************** Iteratively optimize branch lengths with previous REV fit, and re-optimize REV @@ -218,28 +222,25 @@ for (;;) { // Commented out below because this is never actually used in the analysis, and it is always cached anyways // protein_gtr.scores + protein_gtr.phase_results[terms.fit.log_likelihood]; - result_key = "REV-Phase-" + protein_gtr.fit_phase; - if (utility.Has (protein_gtr.analysis_results, result_key, None)) { - io.ReportProgressMessageMD ("Protein GTR Fitter", result_key, - "Loaded cached results for '" + result_key + "'. Log(L) = " + (protein_gtr.analysis_results[result_key])[terms.fit.log_likelihood] ); - protein_gtr.current_gtr_fit = protein_gtr.analysis_results [result_key]; - } else { - protein_gtr.current_gtr_fit = protein_gtr.fitGTRtoFileList (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.phase_results['phase']"), "_value_", "_value_[protein_gtr.phase_results['phase']]"), protein_gtr.current_gtr_fit, result_key, FALSE); - } + result_key = protein_gtr.rev_phase_prefix + protein_gtr.fit_phase; + + protein_gtr.current_gtr_fit = protein_gtr.fitGTRtoFileList (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.phase_results['phase']"), "_value_", "_value_[protein_gtr.phase_results['phase']]"), protein_gtr.current_gtr_fit, result_key, FALSE); protein_gtr.scores + (protein_gtr.analysis_results[result_key])[terms.fit.log_likelihood]; // LogL if (protein_gtr.convergence_type == "LogL"){ console.log("\n\n[PHASE 3] Delta log-L = " + (protein_gtr.scores[Abs(protein_gtr.scores)-1] - protein_gtr.scores[Abs(protein_gtr.scores)-2])); - if (protein_gtr.scores[Abs(protein_gtr.scores)-1] - protein_gtr.scores[Abs(protein_gtr.scores)-2] < protein_gtr.tolerance){ + + // should probably be a better statement checking against 0, but for this type of analysis that might actually get us stuck hovering around optimum. + if (protein_gtr.scores[Abs(protein_gtr.scores)-1] - protein_gtr.scores[Abs(protein_gtr.scores)-2] <= protein_gtr.tolerance){ break; } } // RMSE else { - previous_Q = (protein_gtr.analysis_results["REV-Phase-" + (protein_gtr.fit_phase-1)])[terms.global]; // isolate Q from previous phase + previous_Q = (protein_gtr.analysis_results[protein_gtr.rev_phase_prefix + (protein_gtr.fit_phase-1)])[terms.global]; // isolate Q from previous phase current_Q = (protein_gtr.analysis_results[result_key])[terms.global]; // isolate Q from current phase // Calculate RMSE between previous, current fitted Q's @@ -257,13 +258,11 @@ for (;;) { } rmse = Sqrt( rmse/N ); console.log("\n\n[PHASE 3] RMSE = " + rmse); - if (rmse < protein_gtr.tolerance) { + if (rmse <= protein_gtr.tolerance) { break; } } - - // UNCLEAR IF TO DO: Before going to next iteration which begins w/ a branch length fit, we need to perform normalization. HyPhy might already do this. - + } @@ -275,17 +274,9 @@ for (;;) { console.log("\n\n[PHASE 4] Convergence achieved. Optimizing final model."); -// do a final tune-up by reoptimizing everything -result_key = "REV-Final"; -if (utility.Has (protein_gtr.analysis_results, result_key, None)) { - io.ReportProgressMessageMD ("Protein GTR Fitter", result_key, - "Loaded cached results for '" + result_key + "'. Log(L) = " + (protein_gtr.analysis_results[result_key])[terms.fit.log_likelihood] ); - protein_gtr.current_gtr_fit = protein_gtr.analysis_results [result_key]; -} else { - protein_gtr.current_gtr_fit = protein_gtr.fitGTRtoFileList (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.phase_results['phase']"), "_value_", "_value_[protein_gtr.phase_results['phase']]"), protein_gtr.current_gtr_fit, result_key, TRUE); -} +protein_gtr.current_gtr_fit = protein_gtr.fitGTRtoFileList (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.phase_results['phase']"), "_value_", "_value_[protein_gtr.phase_results['phase']]"), protein_gtr.current_gtr_fit, protein_gtr.final_phase, TRUE); + /* Save the JSON */ -loaded_cache = io.LoadCacheFromFile(protein_gtr.cache_file); -io.SpoolJSON(loaded_cache, protein_gtr.json_file); +io.SpoolJSON(protein_gtr.analysis_results, protein_gtr.json_file); diff --git a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf index fae5cf623..db8c21a61 100644 --- a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf +++ b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf @@ -1,27 +1,7 @@ /*************** Functions used in ProteinGTRFit.bf ******************/ -/** set model definitions **/ protein_gtr.convergence_options = {{"LogL", "Assess REV fit convergence by comparing log likelihood scores"}, {"RMSE", "[Recommended] Assess REV fit convergence by comparing RMSE between fitted matrices."}}; protein_gtr.rate_variation_options = {{"Yes", "Use a four-category discrete gamma distribution when optimizing branch lengths."}, {"No", "Do not consider rate variation when optimizing branch lengths."}}; -function protein_gtr.load_cached_options() { - protein_gtr.convergence_type = (protein_gtr.analysis_results[utility.getGlobalValue("terms.json.options")])[utility.getGlobalValue("protein_gtr.options.convergence_type")]; - protein_gtr.tolerance = (protein_gtr.analysis_results[utility.getGlobalValue("terms.json.options")])[utility.getGlobalValue("protein_gtr.options.tolerance")]; - protein_gtr.baseline_model = (protein_gtr.analysis_results[utility.getGlobalValue("terms.json.options")])[utility.getGlobalValue("protein_gtr.options.baseline_model")]; - protein_gtr.use_rate_variation = (protein_gtr.analysis_results[utility.getGlobalValue("terms.json.options")])[utility.getGlobalValue("protein_gtr.options.rate_variation")]; - - - /*** Cache options can in theory be saved as numbers if options piped in, although not typically. Remap to strings. ****/ - if (Type(protein_gtr.convergence_type) == "Number"){ - protein_gtr.convergence_type = utility.SwapKeysAndValues(utility.MatrixToDict(protein_gtr.convergence_options))[protein_gtr.convergence_type*2]; - } - if (Type(protein_gtr.baseline_model) == "Number"){ - protein_gtr.baseline_model = utility.SwapKeysAndValues(utility.MatrixToDict(models.protein.empirical_models))[protein_gtr.baseline_model*2]; - } - if (Type(protein_gtr.protein_gtr.use_rate_variation) == "Number"){ - protein_gtr.use_rate_variation = utility.SwapKeysAndValues(utility.MatrixToDict(protein_gtr.rate_variation_options))[protein_gtr.use_rate_variation*2]; - } -} - function protein_gtr.save_options() { protein_gtr.analysis_results[utility.getGlobalValue("terms.json.options")] = {utility.getGlobalValue("protein_gtr.options.convergence_type"): protein_gtr.convergence_type, utility.getGlobalValue("protein_gtr.options.tolerance"): protein_gtr.tolerance, @@ -30,17 +10,10 @@ function protein_gtr.save_options() { protein_gtr.analysis_results[utility.getGlobalValue("terms.json.input")] = {utility.getGlobalValue("terms.json.file"): protein_gtr.listfile, "number of datasets": protein_gtr.file_list_count}; - - io.WriteCacheToFile (^"protein_gtr.cache_file", ^"protein_gtr.analysis_results"); } - - - - - /* Model definitions, in particular for models with rate variation */ //------------------------------------------------------------------------------------------------------------------------ @@ -134,31 +107,41 @@ function protein_gtr.fitBaselineToFile (filename) { io.CheckAssertion ("protein_gtr.partition_count==1", "This analysis can only handle a single partition"); + + protein_gtr.filter_specification = alignments.DefineFiltersForPartitions (protein_gtr.partitions_and_trees, + "protein_gtr.msa" , + "protein_gtr.filter.", + protein_gtr.file_info); + + + protein_gtr.full_trees = utility.Map (protein_gtr.partitions_and_trees, "_value_", '_value_[terms.data.tree]'); + protein_gtr.full_data_filter = utility.Map (protein_gtr.filter_specification, "_value_", "_value_[terms.data.name]"); + + /*********** Store dataset information *************/ - protein_gtr.output_data_info = { utility.getGlobalValue("terms.original_name"): {}, - utility.getGlobalValue("terms.json.sequences"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sequences")], - utility.getGlobalValue("terms.json.sites"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sites")] + protein_gtr.output_data_info = { utility.getGlobalValue("terms.json.sequences"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sequences")], + utility.getGlobalValue("terms.json.sites"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sites")], + utility.getGlobalValue("terms.json.trees"): (protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.trees.newick_with_lengths")], + utility.getGlobalValue("terms.json.file"): filename, + utility.getGlobalValue("terms.original_name"): {} }; - + // In case there were no branch lengths + if (Abs((protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.branch_length")]) == 0){ + utility.getGlobalValue("terms.json.trees"): (protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.trees.newick")], + } utility.ForEach (utility.Keys (protein_gtr.name_mapping), "branch_name", "utility.EnsureKey (protein_gtr.output_data_info[terms.original_name], branch_name)"); utility.ForEach (utility.Keys (protein_gtr.name_mapping), "branch_name", "(protein_gtr.output_data_info[terms.original_name])[branch_name] = protein_gtr.name_mapping[branch_name]"); - + cachekey = protein_gtr.index_to_filename[filename]; + protein_gtr.analysis_results[cachekey] = {}; + (protein_gtr.analysis_results[cachekey])[utility.getGlobalValue("terms.json.input")] = protein_gtr.output_data_info; /*****************************************************/ - protein_gtr.filter_specification = alignments.DefineFiltersForPartitions (protein_gtr.partitions_and_trees, - "protein_gtr.msa" , - "protein_gtr.filter.", - protein_gtr.file_info); - - - protein_gtr.full_trees = utility.Map (protein_gtr.partitions_and_trees, "_value_", '_value_[terms.data.tree]'); - protein_gtr.full_data_filter = utility.Map (protein_gtr.filter_specification, "_value_", "_value_[terms.data.name]"); protein_gtr.baseline_mle = estimators.FitSingleModel_Ext(protein_gtr.full_data_filter, @@ -183,13 +166,14 @@ function protein_gtr.fitBaselineToFile (filename) { * @param {Bool} final - True if this is the final tuning after convergence achieved, False if another fit iteration. * @return the fitted MLE */ -function protein_gtr.fitGTRtoFileList (current_results, previous_values, phase, final) { +function protein_gtr.fitGTRtoFileList (current_results, previous_values, phase, finalphase) { io.ReportProgressMessageMD ("Protein GTR Fitter", phase, - "Fitting the REV model using constrained branch lengths proportions (" + phase + ")"); + "Fitting the REV model using constrained branch lengths proportions (" + phase + ")"); - file_list = utility.Keys (current_results); - file_count = utility.Array1D (file_list); + //file_list = utility.Keys (current_results); ---> protein_gtr.file_list + //file_count = utility.Array1D (file_list); ---> protein_gtr.file_list_count + // NOTE: protein_gtr.index_to_filename is {filename:0, filename:1} partition_info = {}; filter_info = {}; @@ -198,22 +182,20 @@ function protein_gtr.fitGTRtoFileList (current_results, previous_values, phase, proportional_scalers = {}; index_to_file_name = {}; - - - for (file_index = 0; file_index < file_count; file_index += 1) { - file_path = file_list [file_index]; - index_to_file_name [file_path] = file_index; + for (file_index = 0; file_index < protein_gtr.file_list_count; file_index += 1) { + file_path = protein_gtr.file_list [file_index]; dataset_name = "protein_gtr.msa." + file_index; - partition_info [file_path] = alignments.ReadNucleotideDataSet (dataset_name, file_path); - partition_specification = { "0" : {terms.data.name : "all", terms.data.filter_string : "", term.data.tree : ((current_results[file_path])[terms.fit.trees])[0]}}; + partition_info [file_index] = alignments.ReadNucleotideDataSet (dataset_name, file_path); + partition_specification = { "0" : {terms.data.name : "all", terms.data.filter_string : "", term.data.tree : ((current_results[file_index])[terms.fit.trees])[0]}}; + filter_info [file_index] = (alignments.DefineFiltersForPartitions (partition_specification, dataset_name , dataset_name, - partition_info [file_path]))[0]; - trees [file_index] = {terms.trees.newick : ((current_results[file_path])[terms.fit.trees])[0]}; - (initial_values[terms.branch_length])[file_index] = ((current_results[file_path])[terms.branch_length])[0]; - if (!final) { + partition_info [file_index]))[0]; + trees [file_index] = {terms.trees.newick : ((current_results[file_index])[terms.fit.trees])[0]}; + (initial_values[terms.branch_length])[file_index] = ((current_results[file_index])[terms.branch_length])[0]; + if (!finalphase) { scaler = "protein_gtr.gtr_scaler_" + file_index; parameters.DeclareGlobalWithRanges (scaler, 1, 0, 1000); proportional_scalers[file_index] = scaler; @@ -224,18 +206,17 @@ function protein_gtr.fitGTRtoFileList (current_results, previous_values, phase, utility.ToggleEnvVariable ("AUTO_PARALLELIZE_OPTIMIZE", 1); utility.ToggleEnvVariable ("OPTIMIZATION_METHOD", 0); - if (! final) { + + if (! finalphase) { // Set initial values to the previous fit if (utility.Has (previous_values, terms.global, "AssociativeList")) { initial_values[terms.global] = previous_values[terms.global]; } - // Set initial values to the specified baseline model ****r_ij**** + // Set initial values else { for (l1 = 0; l1 < 20; l1 += 1) { for (l2 = l1 + 1; l2 < 20; l2 += 1) { - //(initial_values[terms.global]) [terms.aminoacidRate (models.protein.alphabet[l1],models.protein.alphabet[l2])] = {terms.fit.MLE : 1}; // set all to 1 - //(initial_values[terms.global]) [terms.aminoacidRate (models.protein.alphabet[l1],models.protein.alphabet[l2])] = {terms.fit.MLE : Random (0.5,2)}; // set all to random number - (initial_values[terms.global]) [terms.aminoacidRate (models.protein.alphabet[l1],models.protein.alphabet[l2])] = {terms.fit.MLE : protein_gtr.baseline_Rij[l1][l2]}; + (initial_values[terms.global]) [terms.aminoacidRate (models.protein.alphabet[l1],models.protein.alphabet[l2])] = {terms.fit.MLE : 0.1}; // set all to 1 } } } @@ -248,9 +229,6 @@ function protein_gtr.fitGTRtoFileList (current_results, previous_values, phase, {terms.run_options.proportional_branch_length_scaler : proportional_scalers} ); - - // save the model fit to be associated with its filename index - protein_gtr.rev.mle [protein_gtr.filename_to_index] = index_to_file_name; } // FINAL TUNING else { @@ -268,7 +246,6 @@ function protein_gtr.fitGTRtoFileList (current_results, previous_values, phase, // Save the rev.mle into the analysis_results, and cache it. (^"protein_gtr.analysis_results")[phase] = protein_gtr.rev.mle; - io.WriteCacheToFile (^"protein_gtr.cache_file", ^"protein_gtr.analysis_results"); console.log (""); // clear past the optimization progress line utility.SetEnvVariable ("VERBOSITY_LEVEL", 0); @@ -276,20 +253,23 @@ function protein_gtr.fitGTRtoFileList (current_results, previous_values, phase, utility.ToggleEnvVariable ("OPTIMIZATION_METHOD", None); + + // I prefer to keep this as dictionary for compatibility with rest of the output. + protein_gtr.rev.mle[terms.fit.trees] = utility.SwapKeysAndValues(utility.MatrixToDict(protein_gtr.rev.mle[terms.fit.trees])); return protein_gtr.rev.mle; } /** - * @name protein_gtr.fitGTRtoFile + * @name protein_gtr.UpdateBLWithREV * @description Use a previously-fitted average REV amino acid model to a file, specifically for branch length optimization under this model * @param {String} filename - the filename of the dataset to be fitted * @param {Dict} rates - the rates for the GTR model used in fitting * @param {Dict} branch_lengths - the current branch length values for this dataset * @return the fitted MLE */ -function protein_gtr.fitGTRtoFile (filename, rates, branch_lengths) { +function protein_gtr.UpdateBLWithREV (filename, rates, branch_lengths) { protein_gtr.file_info = alignments.ReadNucleotideDataSet ("protein_gtr.msa", filename); @@ -317,6 +297,7 @@ function protein_gtr.fitGTRtoFile (filename, rates, branch_lengths) { protein_gtr.rev_file_mle [terms.branch_length] = { "0" : branch_lengths }; + utility.SetEnvVariable ("VERBOSITY_LEVEL", 1); protein_gtr.rev_file_mle = estimators.FitSingleModel_Ext ( utility.Map (protein_gtr.filter_specification, "_value_", "_value_[terms.data.name]"), // value => value['name'] @@ -325,8 +306,8 @@ function protein_gtr.fitGTRtoFile (filename, rates, branch_lengths) { protein_gtr.rev_file_mle, None ); - utility.SetEnvVariable ("VERBOSITY_LEVEL", 0); - console.log (""); // clear past the optimization progress line + utility.SetEnvVariable ("VERBOSITY_LEVEL", 0); + console.log (""); // clear past the optimization progress line protein_gtr.rev_file_mle - terms.global; // delete redundant keys @@ -358,32 +339,38 @@ function protein_gtr.run_gtr_iteration_branch_lengths () { "protein_gtr.shared_EFV", "protein_gtr.final_baseline_model", "protein_gtr.rev_model_branch_lengths", - "protein_gtr.baseline_Rij", "protein_gtr.filename_to_index" }} }); - protein_gtr.phase_key = "Rev-local-Phase-" + protein_gtr.fit_phase; - io.ReportProgressMessageMD ("Protein GTR Fitter", protein_gtr.phase_key, "Retuning branch lengths (" + protein_gtr.phase_key + ")"); +//console.log(protein_gtr.current_gtr_fit); +// global: sub rates +// EFV +// branch length +//// 0 +////// [all the branches] +//// 1 +////// branch lengthssssss +// Trees (dict now!) +// logl +// parameters - for (file_index = 0; file_index < protein_gtr.file_list_count; file_index += 1) { - if (utility.Has (protein_gtr.analysis_results, {{ protein_gtr.file_list[file_index], protein_gtr.phase_key}}, None)) { - logL = ((protein_gtr.analysis_results[protein_gtr.file_list[file_index]])[protein_gtr.phase_key])[terms.fit.log_likelihood]; - io.ReportProgressMessageMD ("Protein GTR Fitter", protein_gtr.phase_key, - "Loaded cached results for '" + protein_gtr.file_list[file_index] + "' " + (file_index+1) + "/" + protein_gtr.file_list_count + ". Log(L) = " + logL); + protein_gtr.phase_key = protein_gtr.bl_phase_prefix + protein_gtr.fit_phase; - } else { - io.ReportProgressMessageMD ("Protein GTR Fitter", protein_gtr.phase_key, - "Dispatching file '" + protein_gtr.file_list[file_index] + "' " + (file_index+1) + "/" + protein_gtr.file_list_count); + io.ReportProgressMessageMD ("Protein GTR Fitter", protein_gtr.phase_key, "Retuning branch lengths (" + protein_gtr.phase_key + ")"); - mpi.QueueJob (protein_gtr.queue, "protein_gtr.fitGTRtoFile", {"0" : protein_gtr.file_list[file_index], - "1" : protein_gtr.current_gtr_fit[terms.global], - "2" : (protein_gtr.current_gtr_fit[terms.branch_length])[(protein_gtr.current_gtr_fit[protein_gtr.filename_to_index])[protein_gtr.file_list[file_index]]]}, - "protein_gtr.handle_gtr_callback"); - } + for (file_index = 0; file_index < protein_gtr.file_list_count; file_index += 1) { + + io.ReportProgressMessageMD ("Protein GTR Fitter", protein_gtr.phase_key, + "Dispatching file '" + protein_gtr.file_list[file_index] + "' " + (file_index+1) + "/" + protein_gtr.file_list_count); + + mpi.QueueJob (protein_gtr.queue, "protein_gtr.UpdateBLWithREV", {"0" : protein_gtr.file_list[file_index], + "1" : protein_gtr.current_gtr_fit[terms.global], + "2" : (protein_gtr.current_gtr_fit[terms.branch_length])[file_index]}, + "protein_gtr.handle_branch_length_callback"); } mpi.QueueComplete (protein_gtr.queue); @@ -392,46 +379,42 @@ function protein_gtr.run_gtr_iteration_branch_lengths () { io.ReportProgressMessageMD ("Protein GTR Fitter", protein_gtr.phase_key, "Overall Log(L) = " + protein_gtr.run_gtr_iteration_branch_lengths.logL); + + return { protein_gtr.logl : protein_gtr.run_gtr_iteration_branch_lengths.logL , protein_gtr.phase : protein_gtr.phase_key}; } /** * @name protein_gtr.handle_gtr_callback - * @description Handle MPI callback after fitting a REV model and save fit information to cache file + * @description Handle MPI callback after fitting a REV model */ -function protein_gtr.handle_gtr_callback (node, result, arguments) { +function protein_gtr.handle_branch_length_callback (node, result, arguments) { + + cachekey = protein_gtr.index_to_filename[arguments[0]]; - utility.EnsureKey (^"protein_gtr.analysis_results", arguments[0]); + (protein_gtr.analysis_results[cachekey])[utility.getGlobalValue("protein_gtr.phase_key")] = result; - ((^"protein_gtr.analysis_results")[arguments[0]])[utility.getGlobalValue("protein_gtr.phase_key")] = result; - io.ReportProgressMessageMD ("Protein GTR Fitter", "* Rev-local-Phase-" + ^"protein_gtr.fit_phase", + io.ReportProgressMessageMD ("Protein GTR Fitter", "* " + protein_gtr.bl_phase_prefix + ^"protein_gtr.fit_phase", "Received file '" + arguments[0] + "' from node " + node + ". LogL = " + result[terms.fit.log_likelihood]); - io.WriteCacheToFile (^"protein_gtr.cache_file", ^"protein_gtr.analysis_results"); } - /** * @name protein_gtr.handle_baseline_callback * @param node - node name which processed the given data * @param {Dict} result - Dictionary of fitted information for given data * @param {Dict} arguments - Dictionary with single key:value :: 0:datafile name - * @description Handle MPI callback after fitting a baseline amino acid model (for initial branch length optimization) and save fit information to cache file + * @description Handle MPI callback after fitting a baseline amino acid model (for initial branch length optimization) */ function protein_gtr.handle_baseline_callback (node, result, arguments) { - utility.EnsureKey (^"protein_gtr.analysis_results", arguments[0]); - - ((^"protein_gtr.analysis_results")[arguments[0]])[utility.getGlobalValue("protein_gtr.phase_key")] = result; - + cachekey = protein_gtr.index_to_filename[arguments[0]]; + (protein_gtr.analysis_results[cachekey])[utility.getGlobalValue("protein_gtr.phase_key")] = result; + io.ReportProgressMessageMD ("Protein GTR Fitter", "Initial branch length fit", "Received file '" + arguments[0] + "' from node " + node + ". LogL = " + result[terms.fit.log_likelihood]); - - - - io.WriteCacheToFile (^"protein_gtr.cache_file", ^"protein_gtr.analysis_results"); } diff --git a/tests/hbltests/libv3/ProteinGTRFit.wbf b/tests/hbltests/libv3/ProteinGTRFit.wbf index 0e5a5258c..e6544b77c 100644 --- a/tests/hbltests/libv3/ProteinGTRFit.wbf +++ b/tests/hbltests/libv3/ProteinGTRFit.wbf @@ -28,20 +28,13 @@ file_list2 = utility.Map(file_list1, "_value_", "ensureFullPath(_value_)"); // e fprintf(list_path_final, CLEAR_FILE, ""); utility.ForEach(file_list2, "_value_", "writeNewPath(_value_)"); -// Create an empty cache file to ensure that args issued below will always work -list_file = PATH_TO_CURRENT_BF + "data/protgtr_fitter_lines.txt"; -cache_file = list_file + ".cache"; -io.WriteCacheToFile(cache_file, ""); // Empty file exists no matter what - - LoadFunctionLibrary("ProteinAnalyses/ProteinGTRFit.bf", { "0": PATH_TO_CURRENT_BF + "data/protgtr_fitter_lines.txt", - "1": "NO", // Ignore cache file - "2": "LogL", // use logL convergence - "3": "0.1", // stopping - "4": "WAG", // use WAG for baseline - "5": "Yes", // use a gamma for rate variation + "1": "LogL", // use logL convergence + "2": "0.1", // stopping + "3": "WAG", // use WAG for baseline + "4": "Yes", // use a gamma for rate variation }); From a93a5b0822c979f8f5e4034dc6fccedae38f3a0b Mon Sep 17 00:00:00 2001 From: Steven Weaver Date: Mon, 9 Oct 2017 10:25:25 -0400 Subject: [PATCH 04/19] Update .travis.yml --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 90c2f2dd8..857961515 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,6 +14,7 @@ notifications: branches: only: - master + - beta env: - METHOD_TEST=tests/hbltests/libv3/SLAC.wbf From 06ad78bd1fa4828faeed5f45dd1acaaa04d1ef64 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Oct 2017 12:51:12 -0400 Subject: [PATCH 05/19] debugged to fully work in MPI unless odd test case should occur --- .../ProteinAnalyses/ProteinGTRFit.bf | 36 +++--- .../ProteinAnalyses/ProteinGTRFit_helper.ibf | 114 +++++++++++++----- 2 files changed, 100 insertions(+), 50 deletions(-) diff --git a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf index 0a051aa35..d08b41897 100644 --- a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf +++ b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf @@ -60,7 +60,6 @@ protein_gtr.json_file = protein_gtr.listfile + ".json"; protein_gtr.file_list = io.validate_a_list_of_files (protein_gtr.file_list); protein_gtr.file_list_count = Abs (protein_gtr.file_list); protein_gtr.index_to_filename = utility.SwapKeysAndValues(protein_gtr.file_list); - /********* PROMPTS **********/ protein_gtr.analysis_results = {}; @@ -88,7 +87,7 @@ if (protein_gtr.use_rate_variation == "Yes"){ protein_gtr.baseline_model_desc = "protein_gtr.Baseline.ModelDescription"; protein_gtr.rev_model_branch_lengths = "protein_gtr.REV.ModelDescription"; } - +//protein_gtr.shared_EFV = {20,1}; /********************************************************************************************************************/ @@ -109,17 +108,19 @@ protein_gtr.queue = mpi.CreateQueue ({ utility.getGlobalValue("terms.mpi.Header "protein_gtr.shared_EFV", "protein_gtr.baseline_model_desc", "protein_gtr.rev_model_branch_lengths", - "protein_gtr.baseline_model" + "protein_gtr.baseline_model", + "protein_gtr.index_to_filename", + "protein_gtr.analysis_results", + "protein_gtr.baseline_phase" }} }); + + io.ReportProgressMessageMD ("Protein GTR Fitter", "Initial branch length fit", "Initial branch length fit"); protein_gtr.fit_phase = 0; protein_gtr.scores = {}; -protein_gtr.phase_key = protein_gtr.baseline_phase; - - /*************************** STEP ONE *************************** Perform an initial fit of Baseline model+F(+/-4G) to the data (or load cached fit.) @@ -128,33 +129,23 @@ console.log("\n\n[PHASE 1] Performing initial branch length optimization using " for (file_index = 0; file_index < protein_gtr.file_list_count; file_index += 1) { - cached_file = protein_gtr.file_list[file_index] + "' " + (file_index+1) + "/" + protein_gtr.file_list_count; - - if (utility.Has (protein_gtr.analysis_results, {{ protein_gtr.file_list[file_index], protein_gtr.phase_key}}, None)) { - - thisKey = (protein_gtr.analysis_results[protein_gtr.file_list[file_index]])[protein_gtr.baseline_phase]; - io.ReportProgressMessageMD ("Protein GTR Fitter", " * Initial branch length fit", - "Loaded cached results for '" + cached_file + ". Log(L) = " + thisKey[terms.fit.log_likelihood]); + io.ReportProgressMessageMD ("Protein GTR Fitter", " * Initial branch length fit", + "Dispatching file '" + protein_gtr.file_list[file_index]); - } else { - io.ReportProgressMessageMD ("Protein GTR Fitter", " * Initial branch length fit", - "Dispatching file '" + cached_file); - - mpi.QueueJob (protein_gtr.queue, "protein_gtr.fitBaselineToFile", {"0" : protein_gtr.file_list[file_index]}, + mpi.QueueJob (protein_gtr.queue, "protein_gtr.fitBaselineToFile", {"0" : protein_gtr.file_list[file_index]}, "protein_gtr.handle_baseline_callback"); - - } } mpi.QueueComplete (protein_gtr.queue); // Sum of the logL from fitted baseline model across each data set -protein_gtr.baseline_fit_logL = math.Sum (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.phase_key"), "_value_", "(_value_[protein_gtr.phase_key])[terms.fit.log_likelihood]")); +protein_gtr.baseline_fit_logL = math.Sum (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.baseline_phase"), "_value_", "(_value_[protein_gtr.baseline_phase])[terms.fit.log_likelihood]")); io.ReportProgressMessageMD ("Protein GTR Fitter", " * Initial branch length fit", "Overall Log(L) = " + protein_gtr.baseline_fit_logL); + /*************************** STEP TWO *************************** Perform an initial GTR fit on the data *****************************************************************/ @@ -163,7 +154,7 @@ console.log("\n\n[PHASE 2] Performing initial REV fit to the data"); result_key = protein_gtr.rev_phase_prefix + protein_gtr.fit_phase; -current = utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/'" + protein_gtr.phase_key + "'"), "_value_", "_value_['" + protein_gtr.phase_key + "']"); +current = utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/'" + protein_gtr.baseline_phase + "'"), "_value_", "_value_['" + protein_gtr.baseline_phase + "']"); // console.log(utility.Keys(protein_gtr.analysis_results)); // { // {"options", "input", "0", "1"} @@ -181,6 +172,7 @@ current = utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", /// logl ///parameters // 1: .... + protein_gtr.current_gtr_fit = protein_gtr.fitGTRtoFileList (current, None, result_key, FALSE); // last is bool argument, finalphase diff --git a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf index db8c21a61..f8fb782ad 100644 --- a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf +++ b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf @@ -10,10 +10,60 @@ function protein_gtr.save_options() { protein_gtr.analysis_results[utility.getGlobalValue("terms.json.input")] = {utility.getGlobalValue("terms.json.file"): protein_gtr.listfile, "number of datasets": protein_gtr.file_list_count}; + + /* Temporarily, we save input file information here in a highly redundant fashion, but doesn't seem possible to do in MPI...? */ + for (file_index = 0; file_index < protein_gtr.file_list_count; file_index += 1) { + + filename = protein_gtr.file_list[file_index]; + utility.EnsureKey(protein_gtr.analysis_results, file_index); + + protein_gtr.file_info = alignments.ReadNucleotideDataSet ("protein_gtr.msa", + filename); + protein_gtr.name_mapping = protein_gtr.file_info[utility.getGlobalValue("terms.data.name_mapping")]; + if (None == protein_gtr.name_mapping) { /** create a 1-1 mapping if nothing was done */ + protein_gtr.name_mapping = {}; + utility.ForEach (alignments.GetSequenceNames ("protein_gtr.msa"), "_value_", "`&protein_gtr.name_mapping`[_value_] = _value_"); + } + utility.ToggleEnvVariable ("GLOBAL_FPRINTF_REDIRECT", "/dev/null"); + ExecuteCommands ('protein_gtr.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions (protein_gtr.file_info[terms.data.partitions], protein_gtr.name_mapping)', + {"0" : "Y"}); + utility.ToggleEnvVariable ("GLOBAL_FPRINTF_REDIRECT", None); + + + protein_gtr.filter_specification = alignments.DefineFiltersForPartitions (protein_gtr.partitions_and_trees, + "protein_gtr.msa" , + "protein_gtr.filter.", + protein_gtr.file_info); + protein_gtr.tree = utility.Map (protein_gtr.partitions_and_trees, "_value_", '_value_[terms.data.tree]'); + + + protein_gtr.output_data_info = { utility.getGlobalValue("terms.json.sequences"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sequences")], + utility.getGlobalValue("terms.json.sites"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sites")], + utility.getGlobalValue("terms.json.trees"): (protein_gtr.tree["0"])[utility.getGlobalValue("terms.trees.newick_with_lengths")], + utility.getGlobalValue("terms.json.file"): filename, + utility.getGlobalValue("terms.original_name"): {} + }; + + + //In case there were no branch lengths + if ( Abs( (protein_gtr.tree["0"])[utility.getGlobalValue("terms.branch_length")] ) == 0 ){ + protein_gtr.output_data_info[ utility.getGlobalValue("terms.json.trees") ] = (protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.trees.newick")]; + } + utility.ForEach (utility.Keys (protein_gtr.name_mapping), "branch_name", + "utility.EnsureKey (protein_gtr.output_data_info[terms.original_name], branch_name)"); + + utility.ForEach (utility.Keys (protein_gtr.name_mapping), "branch_name", + "(protein_gtr.output_data_info[terms.original_name])[branch_name] = protein_gtr.name_mapping[branch_name]"); + + + (protein_gtr.analysis_results[file_index])[utility.getGlobalValue("terms.json.input")] = protein_gtr.output_data_info; + } } + + /* Model definitions, in particular for models with rate variation */ //------------------------------------------------------------------------------------------------------------------------ @@ -91,6 +141,10 @@ function protein_gtr.REV.ModelDescription.freqs (model, namespace, datafilter) { * @return the fitted MLE */ function protein_gtr.fitBaselineToFile (filename) { + + + utility.EnsureKey(protein_gtr.analysis_results, protein_gtr.index_to_filename[filename]); + protein_gtr.file_info = alignments.ReadNucleotideDataSet ("protein_gtr.msa", filename); protein_gtr.name_mapping = protein_gtr.file_info[utility.getGlobalValue("terms.data.name_mapping")]; @@ -103,6 +157,8 @@ function protein_gtr.fitBaselineToFile (filename) { {"0" : "Y"}); utility.ToggleEnvVariable ("GLOBAL_FPRINTF_REDIRECT", None); + + protein_gtr.partition_count = Abs (protein_gtr.partitions_and_trees); io.CheckAssertion ("protein_gtr.partition_count==1", "This analysis can only handle a single partition"); @@ -118,30 +174,30 @@ function protein_gtr.fitBaselineToFile (filename) { protein_gtr.full_data_filter = utility.Map (protein_gtr.filter_specification, "_value_", "_value_[terms.data.name]"); - /*********** Store dataset information *************/ - protein_gtr.output_data_info = { utility.getGlobalValue("terms.json.sequences"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sequences")], - utility.getGlobalValue("terms.json.sites"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sites")], - utility.getGlobalValue("terms.json.trees"): (protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.trees.newick_with_lengths")], - utility.getGlobalValue("terms.json.file"): filename, - utility.getGlobalValue("terms.original_name"): {} - }; - - + /********** Store dataset information *************/ + /* CURRENTLY DOESN'T WORK IN MPI FOR REASONS TBD */ +// protein_gtr.output_data_info = { utility.getGlobalValue("terms.json.sequences"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sequences")], +// utility.getGlobalValue("terms.json.sites"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sites")], +// utility.getGlobalValue("terms.json.trees"): (protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.trees.newick_with_lengths")], +// utility.getGlobalValue("terms.json.file"): filename +// }; +// +// // In case there were no branch lengths - if (Abs((protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.branch_length")]) == 0){ - utility.getGlobalValue("terms.json.trees"): (protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.trees.newick")], - } - utility.ForEach (utility.Keys (protein_gtr.name_mapping), "branch_name", - "utility.EnsureKey (protein_gtr.output_data_info[terms.original_name], branch_name)"); - - utility.ForEach (utility.Keys (protein_gtr.name_mapping), "branch_name", - "(protein_gtr.output_data_info[terms.original_name])[branch_name] = protein_gtr.name_mapping[branch_name]"); - - cachekey = protein_gtr.index_to_filename[filename]; - protein_gtr.analysis_results[cachekey] = {}; - (protein_gtr.analysis_results[cachekey])[utility.getGlobalValue("terms.json.input")] = protein_gtr.output_data_info; - /*****************************************************/ - +// if ( Abs( (protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.branch_length")] ) == 0 ){ +// protein_gtr.output_data_info[ utility.getGlobalValue("terms.json.trees") ] = (protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.trees.newick")]; +// } +// utility.ForEach (utility.Keys (protein_gtr.name_mapping), "branch_name", +// "utility.EnsureKey (protein_gtr.output_data_info[terms.original_name], branch_name)"); +// +// utility.ForEach (utility.Keys (protein_gtr.name_mapping), "branch_name", +// "(protein_gtr.output_data_info[terms.original_name])[branch_name] = protein_gtr.name_mapping[branch_name]"); +// +// +// (protein_gtr.analysis_results[protein_gtr.index_to_filename[filename]])[utility.getGlobalValue("terms.json.input")] = protein_gtr.output_data_info; +// +// + /****************************************************/ protein_gtr.baseline_mle = estimators.FitSingleModel_Ext(protein_gtr.full_data_filter, @@ -332,12 +388,12 @@ function protein_gtr.run_gtr_iteration_branch_lengths () { { {"protein_gtr.REV.ModelDescription", "protein_gtr.REV.ModelDescription.withGamma", - "protein_gtr.REV.ModelDescription.freqs" + "protein_gtr.REV.ModelDescription.freqs", + "models.protein.REV.ModelDescription.withGamma" } }, "Variables" : {{ "protein_gtr.shared_EFV", - "protein_gtr.final_baseline_model", "protein_gtr.rev_model_branch_lengths", "protein_gtr.filename_to_index" }} @@ -411,10 +467,12 @@ function protein_gtr.handle_branch_length_callback (node, result, arguments) { */ function protein_gtr.handle_baseline_callback (node, result, arguments) { - cachekey = protein_gtr.index_to_filename[arguments[0]]; - - (protein_gtr.analysis_results[cachekey])[utility.getGlobalValue("protein_gtr.phase_key")] = result; + savekey = protein_gtr.index_to_filename[arguments[0]]; + utility.EnsureKey(protein_gtr.analysis_results, savekey); + utility.EnsureKey(protein_gtr.analysis_results[savekey], protein_gtr.baseline_phase); + (protein_gtr.analysis_results[savekey])[protein_gtr.baseline_phase] = result; + io.ReportProgressMessageMD ("Protein GTR Fitter", "Initial branch length fit", "Received file '" + arguments[0] + "' from node " + node + ". LogL = " + result[terms.fit.log_likelihood]); } From 67eb7106ad7dcc461008a066f7faabd66935af09 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 11 Oct 2017 10:46:15 -0400 Subject: [PATCH 06/19] Merging all relative rates to a single batchfile. For legacy reasons, the relative_prot_rates.bf must remain however --- .../ProteinAnalyses/relative_prot_rates.bf | 4 +- res/TemplateBatchFiles/files.lst | 17 +- res/TemplateBatchFiles/libv3/models/DNA.bf | 5 + .../relative_nucleotide_rates.bf | 265 ---------------- .../relative_rates_scaler.bf | 284 ++++++++++++++++++ 5 files changed, 300 insertions(+), 275 deletions(-) delete mode 100644 res/TemplateBatchFiles/relative_nucleotide_rates.bf create mode 100644 res/TemplateBatchFiles/relative_rates_scaler.bf diff --git a/res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf b/res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf index d33822c6e..b696bd0fe 100644 --- a/res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf +++ b/res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf @@ -1,4 +1,4 @@ -RequireVersion("2.3.3"); +RequireVersion("2.3.4"); LoadFunctionLibrary("libv3/UtilityFunctions.bf"); LoadFunctionLibrary("libv3/IOFunctions.bf"); @@ -31,6 +31,8 @@ relative_prot_rates.analysis_description = { io.DisplayAnalysisBanner(relative_prot_rates.analysis_description); +console.log("**WARNING**: This analysis will be ported to a new batchfile in a future HyPhy release. Please consider executing the batchfile \`relative_rates_scaler.bf\` instead."); +console.log(""); /***************************************** LOAD DATASET **********************************************************/ SetDialogPrompt ("Specify a protein multiple sequence alignment file"); diff --git a/res/TemplateBatchFiles/files.lst b/res/TemplateBatchFiles/files.lst index b45db7a1b..bd92aa56e 100644 --- a/res/TemplateBatchFiles/files.lst +++ b/res/TemplateBatchFiles/files.lst @@ -1,18 +1,17 @@ "","Run a standard selection inference method on coding-sequence data.","!Selection Analyses"; -"MEME","Test for episodic site-level selection using MEME (Mixed Effects Model of Evolution).","SelectionAnalyses/MEME.bf"; -"FEL","Test for pervasive site-level selection using FEL (Fixed Effects Likelihood).","SelectionAnalyses/FEL.bf"; -"SLAC","Test for pervasive site-level selection using SLAC (Single Likelihood Ancestor Counting).","SelectionAnalyses/SLAC.bf"; -"FUBAR","Test for pervasive site-level selection using FUBAR (Fast Unconstrained Bayesian AppRoximation for inferring selection).","SelectionAnalyses/FUBAR.bf"; -"BUSTED","Test for episodic gene-wide selection using BUSTED (Branch-site Unrestricted Statistical Test of Episodic Diversification).","SelectionAnalyses/BUSTED.bf"; -"aBSREL","Test for lineage-specific evolution using the branch-site method aBS-REL (Adaptive Branch-Site Random Effects Likelihood).","SelectionAnalyses/aBSREL.bf"; -"RELAX","Test for relaxation of selection pressure along a specified set of test branches using RELAX (a random effects test of selection relaxation).","SelectionAnalyses/RELAX.bf"; +"MEME","[MEME] Test for episodic site-level selection using MEME (Mixed Effects Model of Evolution).","SelectionAnalyses/MEME.bf"; +"FEL","[FEL] Test for pervasive site-level selection using FEL (Fixed Effects Likelihood).","SelectionAnalyses/FEL.bf"; +"SLAC","[SLAC] Test for pervasive site-level selection using SLAC (Single Likelihood Ancestor Counting).","SelectionAnalyses/SLAC.bf"; +"FUBAR","[FUBAR] Test for pervasive site-level selection using FUBAR (Fast Unconstrained Bayesian AppRoximation for inferring selection).","SelectionAnalyses/FUBAR.bf"; +"BUSTED","[BUSTED] Test for episodic gene-wide selection using BUSTED (Branch-site Unrestricted Statistical Test of Episodic Diversification).","SelectionAnalyses/BUSTED.bf"; +"aBSREL","[aBSREL] Test for lineage-specific evolution using the branch-site method aBS-REL (Adaptive Branch-Site Random Effects Likelihood).","SelectionAnalyses/aBSREL.bf"; +"RELAX","[RELAX] Test for relaxation of selection pressure along a specified set of test branches using RELAX (a random effects test of selection relaxation).","SelectionAnalyses/RELAX.bf"; "","A collection of tools for evolutionary hypothesis testing coding-sequence data.","!Evolutionary Hypothesis Testing"; "FEL-Contrast","Use a FEL method to test which sites in a gene may be associated with adaptation to a different environment.","SelectionAnalyses/FEL-contrast.bf"; "","Evolutionary rates on non-coding data.","!Relative evolutionary rate inference"; -"Relative amino acid rates","Run a relative rates analysis on a protein alignment, in the spirit of Rate4Site (PMID: 12169533).","ProteinAnalyses/relative_prot_rates.bf"; -"Relative nucleotide rates","Run a relative rates analysis on a nucleotide alignment, in the spirit of Rate4Site (PMID: 12169533) but applied to nucleotides.","relative_nucleotide_rates.bf"; +"Relative rates","Infer relative evolutionary rates on a nucleotide or protein alignment, in the spirit of Rate4Site (PMID: 12169533).","relative_rates_scaler.bf"; "","Perform a maximum likelihood analysis on a single file given a single tree.","!Basic Analyses"; "ACD","Analyse codon data with a variery of standard models using given tree.","AnalyzeCodonData.bf"; diff --git a/res/TemplateBatchFiles/libv3/models/DNA.bf b/res/TemplateBatchFiles/libv3/models/DNA.bf index dff5f46f5..c753b7144 100644 --- a/res/TemplateBatchFiles/libv3/models/DNA.bf +++ b/res/TemplateBatchFiles/libv3/models/DNA.bf @@ -8,6 +8,11 @@ models.DNA.models = {{"GTR", "General time reversible model"}, {"HKY85", "Hasegawa Kishino Yano 85 (HKY85) model"}, {"JC69", "Jukes-Cantor 69 (JC69) model"}}; +models.DNA.generators ={"GTR": "models.DNA.GTR.ModelDescription", + "HKY85": "models.DNA.HKY85.ModelDescription", + "JC69": "models.DNA.JC69.ModelDescription"}; + + models.DNA.alphabet = {{"A","C","G","T"}}; models.DNA.dimensions = 4; diff --git a/res/TemplateBatchFiles/relative_nucleotide_rates.bf b/res/TemplateBatchFiles/relative_nucleotide_rates.bf deleted file mode 100644 index a9150774a..000000000 --- a/res/TemplateBatchFiles/relative_nucleotide_rates.bf +++ /dev/null @@ -1,265 +0,0 @@ -RequireVersion("2.3.4"); - -LoadFunctionLibrary("libv3/UtilityFunctions.bf"); -LoadFunctionLibrary("libv3/IOFunctions.bf"); -LoadFunctionLibrary("libv3/stats.bf"); -LoadFunctionLibrary("libv3/all-terms.bf"); - -LoadFunctionLibrary("libv3/tasks/ancestral.bf"); -LoadFunctionLibrary("libv3/tasks/alignments.bf"); -LoadFunctionLibrary("libv3/tasks/estimators.bf"); -LoadFunctionLibrary("libv3/tasks/trees.bf"); -LoadFunctionLibrary("libv3/tasks/mpi.bf"); -LoadFunctionLibrary("libv3/convenience/math.bf"); - - -LoadFunctionLibrary("libv3/models/DNA.bf"); -LoadFunctionLibrary("libv3/models/DNA/GTR.bf"); -LoadFunctionLibrary("libv3/models/DNA/HKY85.bf"); -LoadFunctionLibrary("libv3/models/DNA/JC69.bf"); - -/*------------------------------------------------------------------------------*/ - -utility.ToggleEnvVariable ("NORMALIZE_SEQUENCE_NAMES", 1); - -relative_nuc_rates.analysis_description = { - terms.io.info: "RELnuc (RELative nucleotide rates) infers, for a fixed **nucleotide** alignment and tree, **relative** site specific substitution rates, - by first optimizing alignment-wide branch lengths, and then inferring a site-specific uniform tree scaler", - terms.io.version: "0.1alpha", - terms.io.reference: "@TBD. Analysis based on Rate4Site method, extended for nucleotides: Pupko, T., Bell, R. E., Mayrose, I., Glaser, F. & Ben-Tal, N. Rate4Site: an algorithmic tool for the identification of functional regions in proteins by surface mapping of evolutionary determinants within their homologues. Bioinformatics 18, S71–S77 (2002).", - terms.io.authors: "Sergei L Kosakovsky Pond and Stephanie J Spielman", - terms.io.contact: "{spond,stephanie.spielman}@temple.edu" -}; - -io.DisplayAnalysisBanner(relative_nuc_rates.analysis_description); - - - -/***************************************** LOAD DATASET **********************************************************/ -SetDialogPrompt ("Specify a nucleotide multiple sequence alignment file"); - - -relative_nuc_rates.alignment_info = alignments.ReadNucleotideDataSet ("relative_nuc_rates.dataset", None); - -name_mapping = relative_nuc_rates.alignment_info[utility.getGlobalValue("terms.data.name_mapping")]; -if (None == name_mapping) { - name_mapping = {}; - utility.ForEach (alignments.GetSequenceNames ("relative_nuc_rates.dataset"), "_value_", "`&name_mapping`[_value_] = _value_"); -} -relative_nuc_rates.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions (relative_nuc_rates.alignment_info[utility.getGlobalValue("terms.data.partitions")], name_mapping); -relative_nuc_rates.partition_count = Abs (relative_nuc_rates.partitions_and_trees); - -io.CheckAssertion ("relative_nuc_rates.partition_count==1", "This analysis can only handle a single partition"); - - - -io.ReportProgressMessageMD ("relative_nuc_rates", "Data", "Input alignment description"); -io.ReportProgressMessageMD ("relative_nuc_rates", "Data", "Loaded **" + - relative_nuc_rates.alignment_info [terms.data.sequences] + "** sequences, **" + - relative_nuc_rates.alignment_info [terms.data.sites] + "** sites, and **" + relative_nuc_rates.partition_count + "** partitions from \`" + relative_nuc_rates.alignment_info [terms.data.file] + "\`"); - -relative_nuc_rates.filter_specification = alignments.DefineFiltersForPartitions (relative_nuc_rates.partitions_and_trees, "relative_nuc_rates.dataset" , "relative_nuc_rates.filter.", relative_nuc_rates.alignment_info); - - -/********* Select model *********/ - - -relative_nuc_rates.model_name = io.SelectAnOption (models.DNA.models, - "Select a nucleotide model:"); - -// TODO: Add more nucleotide models, once more are added to the models/DNA/... -relative_nuc_rates.model_generators = {"GTR": "models.DNA.GTR.ModelDescription", - "HKY85": "models.DNA.HKY85.ModelDescription", - "JC69": "models.DNA.JC69.ModelDescription"}; - -relative_nuc_rates.model_generator = relative_nuc_rates.model_generators[relative_nuc_rates.model_name]; - - - - - -io.ReportProgressMessageMD ("relative_nuc_rates", "overall", "Obtaining alignment-wide branch-length estimates"); - -relative_nuc_rates.trees = utility.Map (relative_nuc_rates.partitions_and_trees, "_value_", "_value_[terms.data.tree]"); // value => value['tree'] -relative_nuc_rates.filter_names = utility.Map (relative_nuc_rates.filter_specification, "_value_", "_value_[terms.data.name]"); // value => value['name'] -relative_nuc_rates.alignment_wide_MLES = estimators.FitSingleModel_Ext ( - relative_nuc_rates.filter_names, - relative_nuc_rates.trees, - relative_nuc_rates.model_generator, - None, - None); - -estimators.fixSubsetOfEstimates(relative_nuc_rates.alignment_wide_MLES, relative_nuc_rates.alignment_wide_MLES[terms.global]); - -io.ReportProgressMessageMD ("relative_nuc_rates", "overall", ">Fitted an alignment-wide model. **Log-L = " + relative_nuc_rates.alignment_wide_MLES [terms.fit.log_likelihood] + "**."); - - -/** - Set up the table to display to the screen -*/ - - -relative_nuc_rates.table_screen_output = {{"Site", "Rel. rate (MLE)", "95% profile likelihood CI"}}; -relative_nuc_rates.table_output_options = {terms.table_options.header : TRUE, terms.table_options.minimum_column_width : 16, terms.table_options.align : "center"}; - -relative_nuc_rates.site_patterns = alignments.Extract_site_patterns (relative_nuc_rates.filter_names[0]); - -// set-up model for site-level fitting in the next couple of lines -relative_nuc_rates.site_model = model.generic.DefineModel(relative_nuc_rates.model_generator, - "relative_nuc_rates_site_model_instance", { - "0": parameters.Quote(terms.global), - }, - relative_nuc_rates.filter_names[0], - None); - - -relative_nuc_rates.site_model_mapping = {"relative_nuc_rates_site_model_instance" : relative_nuc_rates.site_model}; - -// relative_nuc_rates.site_tree is created from the information in relative_nuc_rates.trees[0] -// and populated with (the default) model -model.ApplyModelToTree( "relative_nuc_rates.site_tree", relative_nuc_rates.trees[0], {terms.default : relative_nuc_rates.site_model}, None); - -// create a site filter; this is an ugly hack for the time being -// alignments.serialize_site_filter returns HBL code as string in -// which the function `__make_filter` is defined. -ExecuteCommands (alignments.serialize_site_filter ( - relative_nuc_rates.filter_names[0], - ((relative_nuc_rates.site_patterns[0])[terms.data.sites])[0])); - -__make_filter ("relative_nuc_rates.site_filter"); - -LikelihoodFunction relative_nuc_rates.site_likelihood = (relative_nuc_rates.site_filter, relative_nuc_rates.site_tree); - -relative_nuc_rates.site_model_scaler_name = "relative_nuc_rates.site_rate_estimate"; - -relative_nuc_rates.rate_estimates = {}; - -/** - this will store site estimates, which will then be dumped to JSON -*/ - -parameters.DeclareGlobal (relative_nuc_rates.site_model_scaler_name, None); - -estimators.ApplyExistingEstimates ("relative_nuc_rates.site_likelihood", relative_nuc_rates.site_model_mapping, relative_nuc_rates.alignment_wide_MLES, - {"0" : relative_nuc_rates.site_model_scaler_name} // proportional scaler - ); - - -relative_nuc_rates.queue = mpi.CreateQueue ({terms.mpi.LikelihoodFunctions: {{"relative_nuc_rates.site_likelihood"}}, - terms.mpi.Models : {{"relative_nuc_rates.site_model"}}, - terms.mpi.Headers : utility.GetListOfLoadedModules ("libv3/"), - terms.mpi.Variables : {{"relative_nuc_rates.site_model_scaler_name"}} - }); - -/* run the main loop over all unique site pattern combinations */ -utility.ForEachPair (relative_nuc_rates.site_patterns, "_pattern_", "_pattern_info_", - ' - mpi.QueueJob (relative_nuc_rates.queue, "relative_nuc_rates.handle_a_site", {"0" : "relative_nuc_rates.site_likelihood", - "1" : alignments.serialize_site_filter - ((relative_nuc_rates.filter_specification[0])[terms.data.name], - (_pattern_info_[terms.data.sites])[0]), - "2" : _pattern_info_, - "3" : relative_nuc_rates.site_model_mapping - }, - "relative_nuc_rates.store_results"); - ' -); - -mpi.QueueComplete (relative_nuc_rates.queue); - -relative_nuc_rates.site_rates = utility.Map( utility.Values(utility.Map (relative_nuc_rates.rate_estimates, "_value_", "_value_[terms.fit.MLE]")), "_value_", "0+_value_"); -relative_nuc_rates.stats = math.GatherDescriptiveStats(relative_nuc_rates.site_rates); - -io.ReportProgressMessageMD ("relative_nuc_rates", "Stats", "Rate distribution summary"); -io.ReportProgressMessageMD ("relative_nuc_rates", "Stats", "* **Mean**: " + Format (relative_nuc_rates.stats[terms.math.mean], 6, 2)); -io.ReportProgressMessageMD ("relative_nuc_rates", "Stats", "* **Median**: " + Format (relative_nuc_rates.stats[terms.math.median], 6, 2)); -io.ReportProgressMessageMD ("relative_nuc_rates", "Stats", "* **Std.Dev**: " + Format (relative_nuc_rates.stats[terms.math.stddev], 6, 2)); -io.ReportProgressMessageMD ("relative_nuc_rates", "Stats", "* **95% Range**: [" + Format (relative_nuc_rates.stats[terms.math._2.5], 5,2) + "," + Format (relative_nuc_rates.stats[terms.math._97.5], 5,2) + "]"); - - -tree_definition = utility.Map (relative_nuc_rates.partitions_and_trees, "_partition_", '_partition_[terms.data.tree]'); -io.SpoolJSON ({ terms.json.input : {terms.json.file: relative_nuc_rates.alignment_info[terms.data.file], - terms.json.sequences: relative_nuc_rates.alignment_info[terms.data.sequences], - terms.json.sites: relative_nuc_rates.alignment_info[terms.data.sites], - terms.json.tree_string: (tree_definition[0])[terms.trees.newick_with_lengths]}, - terms.json.analysis : relative_nuc_rates.analysis_description, - terms.json.relative_site_rates : relative_nuc_rates.rate_estimates, - terms.json.global: {terms.json.model: relative_nuc_rates.model_name, - //terms.json.branch_lengths: relative_nuc_rates.alignment_wide_MLES[terms.branch_length], - terms.json.tree_string: (relative_nuc_rates.alignment_wide_MLES[terms.fit.trees])[0], - terms.json.log_likelihood: relative_nuc_rates.alignment_wide_MLES[terms.fit.log_likelihood]} - }, - relative_nuc_rates.alignment_info[terms.data.file] + ".site-rates.json"); - - -//---------------------------------------------------------------------------------------- -// HANDLERS -//---------------------------------------------------------------------------------------- - -// fit a rate at a single site - -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- - -lfunction relative_nuc_rates.handle_a_site (lf, filter_data, pattern_info, model_mapping) { - - - GetString (lfInfo, ^lf,-1); - ExecuteCommands (filter_data); - - __make_filter ((lfInfo["Datafilters"])[0]); - utility.SetEnvVariable ("USE_LAST_RESULTS", TRUE); - - if (pattern_info [utility.getGlobalValue("terms.data.is_constant")]) { - // the MLE for a constant site is 0; - // only the CI is non-trivial - ^(utility.getGlobalValue("relative_nuc_rates.site_model_scaler_name")) = 0; - - } else { - - ^(utility.getGlobalValue("relative_nuc_rates.site_model_scaler_name")) = 1; - Optimize (results, ^lf); - } - - return parameters.GetProfileCI (utility.getGlobalValue("relative_nuc_rates.site_model_scaler_name"), lf, 0.95); -} - - -// handle result processing - -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- - - -lfunction relative_nuc_rates.store_results (node, result, arguments) { - pattern_info = arguments [2]; - - - if ((^'relative_nuc_rates.table_output_options')[utility.getGlobalValue("terms.table_options.header")]) { - - io.ReportProgressMessageMD ("relative_nuc_rates", "sites", "Site rate estimates and associated 95% profile likelihood estimates\n"); - - fprintf (stdout, - io.FormatTableRow (^'relative_nuc_rates.table_screen_output',^'relative_nuc_rates.table_output_options')); - (^'relative_nuc_rates.table_output_options')[utility.getGlobalValue("terms.table_options.header")] = FALSE; - } - - - utility.ForEach (pattern_info[utility.getGlobalValue("terms.data.sites")], "_site_index_", - " - relative_nuc_rates.rate_estimates [_site_index_+1] = `&result`; - result_row = {1,3}; - result_row [0] = '' + (_site_index_ + 1); - result_row [1] = Format((`&result`)[terms.fit.MLE],6,3); - result_row [2] = Format((`&result`)[terms.lower_bound],6,3) + ' :' +Format((`&result`)[terms.upper_bound],6,3); - fprintf (stdout, - io.FormatTableRow (result_row,relative_nuc_rates.table_output_options)); - " - ); - - return rate_statistics; - -} - - - diff --git a/res/TemplateBatchFiles/relative_rates_scaler.bf b/res/TemplateBatchFiles/relative_rates_scaler.bf new file mode 100644 index 000000000..48d77dc7c --- /dev/null +++ b/res/TemplateBatchFiles/relative_rates_scaler.bf @@ -0,0 +1,284 @@ +RequireVersion("2.3.5"); + +LoadFunctionLibrary("libv3/UtilityFunctions.bf"); +LoadFunctionLibrary("libv3/IOFunctions.bf"); +LoadFunctionLibrary("libv3/stats.bf"); +LoadFunctionLibrary("libv3/all-terms.bf"); + +LoadFunctionLibrary("libv3/tasks/ancestral.bf"); +LoadFunctionLibrary("libv3/tasks/alignments.bf"); +LoadFunctionLibrary("libv3/tasks/estimators.bf"); +LoadFunctionLibrary("libv3/tasks/trees.bf"); +LoadFunctionLibrary("libv3/tasks/mpi.bf"); +LoadFunctionLibrary("libv3/convenience/math.bf"); + + +LoadFunctionLibrary("libv3/models/DNA.bf"); +LoadFunctionLibrary("libv3/models/DNA/GTR.bf"); +LoadFunctionLibrary("libv3/models/DNA/HKY85.bf"); +LoadFunctionLibrary("libv3/models/DNA/JC69.bf"); +LoadFunctionLibrary("libv3/models/protein.bf"); +LoadFunctionLibrary("libv3/models/protein/empirical.bf"); + + +/*------------------------------------------------------------------------------*/ + +utility.ToggleEnvVariable ("NORMALIZE_SEQUENCE_NAMES", 1); + +relative_rates.analysis_description = { + terms.io.info: "RELrates: Infer relative amino-acid or nucleotide rates from a fixed nucleotide or amino-acid alignment and tree. Relative site-specific substitution rates are + inferred by first optimizing alignment-wide branch lengths, and then inferring a site-specific uniform tree scaler", + terms.io.version: "0.1alpha", + terms.io.reference: "@TBD. Analysis based on Rate4Site method, : Pupko, T., Bell, R. E., Mayrose, I., Glaser, F. & Ben-Tal, N. Rate4Site: an algorithmic tool for the identification of functional regions in proteins by surface mapping of evolutionary determinants within their homologues. Bioinformatics 18, S71–S77 (2002).", + terms.io.authors: "Sergei L Kosakovsky Pond and Stephanie J Spielman", + terms.io.contact: "{spond,stephanie.spielman}@temple.edu" +}; + +io.DisplayAnalysisBanner(relative_rates.analysis_description); + +/***************************************** MODEL SELECTION **********************************************************/ + +relative_rates.protein_type = "Protein"; +relative_rates.nucleotide_type = "Nucleotide"; +relative_rates.analysis_type = io.SelectAnOption ({{relative_rates.protein_type , "Infer relative rates from a protein (amino-acid) alignment"}, {relative_rates.nucleotide_type, "Infer relative rates from a nucleotide alignment"}}, + "Select your analysis type:"); + + +if (relative_rates.analysis_type == relative_rates.protein_type) { + relative_rates.model_name = io.SelectAnOption (models.protein.empirical_models, + "Select a protein model:"); + // "Yes", "No" + relative_rates.plusF = io.SelectAnOption ({{"Yes", "Use empirical (+F) amino-acid frequencies ."}, {"No", "Use default amino-acid frequencies."}}, + "Use a +F model for initial branch length optimization?"); + // Set up model generator and name as +F or not. + if (relative_rates.plusF == "Yes"){ + relative_rates.model_generator = models.protein.empirical.plusF_generators[relative_rates.model_name]; + relative_rates.model_name = relative_rates.model_name + "+F"; + } + else { + relative_prot_rates.model_generator = models.protein.empirical.default_generators[relative_rates.model_name]; + } +} +else { + + relative_rates.model_name = io.SelectAnOption (models.DNA.models, + "Select a nucleotide model:"); + relative_rates.model_generator = models.DNA.generators[relative_rates.model_name]; +} +/*******************************************************************************************************************/ + + +/***************************************** LOAD DATASET **********************************************************/ +SetDialogPrompt ("Specify a multiple sequence alignment file"); +relative_rates.alignment_info = alignments.ReadNucleotideDataSet ("relative_rates.dataset", None); + + + +name_mapping = relative_rates.alignment_info[utility.getGlobalValue("terms.data.name_mapping")]; +if (None == name_mapping) { + name_mapping = {}; + utility.ForEach (alignments.GetSequenceNames ("relative_rates.dataset"), "_value_", "`&name_mapping`[_value_] = _value_"); +} +relative_rates.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions (relative_rates.alignment_info[utility.getGlobalValue("terms.data.partitions")], name_mapping); +relative_rates.partition_count = Abs (relative_rates.partitions_and_trees); + +io.CheckAssertion ("relative_rates.partition_count==1", "This analysis can only handle a single partition"); + + +io.ReportProgressMessageMD ("relative_rates", "Data", "Input alignment description"); +io.ReportProgressMessageMD ("relative_rates", "Data", "Loaded **" + + relative_rates.alignment_info [terms.data.sequences] + "** sequences, **" + + relative_rates.alignment_info [terms.data.sites] + "** sites, and **" + relative_rates.partition_count + "** partitions from \`" + relative_rates.alignment_info [terms.data.file] + "\`"); +relative_rates.filter_specification = alignments.DefineFiltersForPartitions (relative_rates.partitions_and_trees, "relative_rates.dataset" , "relative_rates.filter.", relative_rates.alignment_info); +/*******************************************************************************************************************/ + + + +/***************************************** INFERENCE **********************************************************/ + + +io.ReportProgressMessageMD ("relative_rates", "overall", "Obtaining alignment-wide branch-length estimates"); + +relative_rates.trees = utility.Map (relative_rates.partitions_and_trees, "_value_", "_value_[terms.data.tree]"); // value => value['tree'] +relative_rates.filter_names = utility.Map (relative_rates.filter_specification, "_value_", "_value_[terms.data.name]"); // value => value['name'] +relative_rates.alignment_wide_MLES = estimators.FitSingleModel_Ext ( + relative_rates.filter_names, + relative_rates.trees, + relative_rates.model_generator, + None, + None); + +estimators.fixSubsetOfEstimates(relative_rates.alignment_wide_MLES, relative_rates.alignment_wide_MLES[terms.global]); + +io.ReportProgressMessageMD ("relative_rates", "overall", ">Fitted an alignment-wide model. **Log-L = " + relative_rates.alignment_wide_MLES [terms.fit.log_likelihood] + "**."); + + +/** + Set up the table to display to the screen +*/ + + +relative_rates.table_screen_output = {{"Site", "Rel. rate (MLE)", "95% profile likelihood CI"}}; +relative_rates.table_output_options = {terms.table_options.header : TRUE, terms.table_options.minimum_column_width : 16, terms.table_options.align : "center"}; + +relative_rates.site_patterns = alignments.Extract_site_patterns (relative_rates.filter_names[0]); + +// set-up model for site-level fitting in the next couple of lines +relative_rates.site_model = model.generic.DefineModel(relative_rates.model_generator, + "relative_rates_site_model_instance", { + "0": parameters.Quote(terms.global), + }, + relative_rates.filter_names[0], + None); + + +relative_rates.site_model_mapping = {"relative_rates_site_model_instance" : relative_rates.site_model}; + +// relative_rates.site_tree is created from the information in relative_rates.trees[0] +// and populated with (the default) model +model.ApplyModelToTree( "relative_rates.site_tree", relative_rates.trees[0], {terms.default : relative_rates.site_model}, None); + +// create a site filter; this is an ugly hack for the time being +// alignments.serialize_site_filter returns HBL code as string in +// which the function `__make_filter` is defined. +ExecuteCommands (alignments.serialize_site_filter ( + relative_rates.filter_names[0], + ((relative_rates.site_patterns[0])[terms.data.sites])[0])); + +__make_filter ("relative_rates.site_filter"); + +LikelihoodFunction relative_rates.site_likelihood = (relative_rates.site_filter, relative_rates.site_tree); + +relative_rates.site_model_scaler_name = "relative_rates.site_rate_estimate"; + +relative_rates.rate_estimates = {}; + +/** + this will store site estimates, which will then be dumped to JSON +*/ + +parameters.DeclareGlobal (relative_rates.site_model_scaler_name, None); + +estimators.ApplyExistingEstimates ("relative_rates.site_likelihood", relative_rates.site_model_mapping, relative_rates.alignment_wide_MLES, + {"0" : relative_rates.site_model_scaler_name} // proportional scaler + ); + + +relative_rates.queue = mpi.CreateQueue ({terms.mpi.LikelihoodFunctions: {{"relative_rates.site_likelihood"}}, + terms.mpi.Models : {{"relative_rates.site_model"}}, + terms.mpi.Headers : utility.GetListOfLoadedModules ("libv3/"), + terms.mpi.Variables : {{"relative_rates.site_model_scaler_name"}} + }); + +/* run the main loop over all unique site pattern combinations */ +utility.ForEachPair (relative_rates.site_patterns, "_pattern_", "_pattern_info_", + ' + mpi.QueueJob (relative_rates.queue, "relative_rates.handle_a_site", {"0" : "relative_rates.site_likelihood", + "1" : alignments.serialize_site_filter + ((relative_rates.filter_specification[0])[terms.data.name], + (_pattern_info_[terms.data.sites])[0]), + "2" : _pattern_info_, + "3" : relative_rates.site_model_mapping + }, + "relative_rates.store_results"); + ' +); + +mpi.QueueComplete (relative_rates.queue); + +relative_rates.site_rates = utility.Map( utility.Values(utility.Map (relative_rates.rate_estimates, "_value_", "_value_[terms.fit.MLE]")), "_value_", "0+_value_"); +relative_rates.stats = math.GatherDescriptiveStats(relative_rates.site_rates); + +io.ReportProgressMessageMD ("relative_rates", "Stats", "Rate distribution summary"); +io.ReportProgressMessageMD ("relative_rates", "Stats", "* **Mean**: " + Format (relative_rates.stats[terms.math.mean], 6, 2)); +io.ReportProgressMessageMD ("relative_rates", "Stats", "* **Median**: " + Format (relative_rates.stats[terms.math.median], 6, 2)); +io.ReportProgressMessageMD ("relative_rates", "Stats", "* **Std.Dev**: " + Format (relative_rates.stats[terms.math.stddev], 6, 2)); +io.ReportProgressMessageMD ("relative_rates", "Stats", "* **95% Range**: [" + Format (relative_rates.stats[terms.math._2.5], 5,2) + "," + Format (relative_rates.stats[terms.math._97.5], 5,2) + "]"); + + +tree_definition = utility.Map (relative_rates.partitions_and_trees, "_partition_", '_partition_[terms.data.tree]'); +io.SpoolJSON ({ terms.json.input : {terms.json.file: relative_rates.alignment_info[terms.data.file], + terms.json.sequences: relative_rates.alignment_info[terms.data.sequences], + terms.json.sites: relative_rates.alignment_info[terms.data.sites], + terms.json.tree_string: (tree_definition[0])[terms.trees.newick_with_lengths]}, + terms.json.analysis : relative_rates.analysis_description, + terms.json.relative_site_rates : relative_rates.rate_estimates, + terms.json.global: {terms.json.model: relative_rates.model_name, + //terms.json.branch_lengths: relative_rates.alignment_wide_MLES[terms.branch_length], + terms.json.tree_string: (relative_rates.alignment_wide_MLES[terms.fit.trees])[0], + terms.json.log_likelihood: relative_rates.alignment_wide_MLES[terms.fit.log_likelihood]} + }, + relative_rates.alignment_info[terms.data.file] + ".site-rates.json"); + + +//---------------------------------------------------------------------------------------- +// HANDLERS +//---------------------------------------------------------------------------------------- + +// fit a rate at a single site + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + +lfunction relative_rates.handle_a_site (lf, filter_data, pattern_info, model_mapping) { + + + GetString (lfInfo, ^lf,-1); + ExecuteCommands (filter_data); + + __make_filter ((lfInfo["Datafilters"])[0]); + utility.SetEnvVariable ("USE_LAST_RESULTS", TRUE); + + if (pattern_info [utility.getGlobalValue("terms.data.is_constant")]) { + // the MLE for a constant site is 0; + // only the CI is non-trivial + ^(utility.getGlobalValue("relative_rates.site_model_scaler_name")) = 0; + + } else { + + ^(utility.getGlobalValue("relative_rates.site_model_scaler_name")) = 1; + Optimize (results, ^lf); + } + + return parameters.GetProfileCI (utility.getGlobalValue("relative_rates.site_model_scaler_name"), lf, 0.95); +} + + +// handle result processing + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + + +lfunction relative_rates.store_results (node, result, arguments) { + pattern_info = arguments [2]; + + + if ((^'relative_rates.table_output_options')[utility.getGlobalValue("terms.table_options.header")]) { + + io.ReportProgressMessageMD ("relative_rates", "sites", "Site rate estimates and associated 95% profile likelihood estimates\n"); + + fprintf (stdout, + io.FormatTableRow (^'relative_rates.table_screen_output',^'relative_rates.table_output_options')); + (^'relative_rates.table_output_options')[utility.getGlobalValue("terms.table_options.header")] = FALSE; + } + + + utility.ForEach (pattern_info[utility.getGlobalValue("terms.data.sites")], "_site_index_", + " + relative_rates.rate_estimates [_site_index_+1] = `&result`; + result_row = {1,3}; + result_row [0] = '' + (_site_index_ + 1); + result_row [1] = Format((`&result`)[terms.fit.MLE],6,3); + result_row [2] = Format((`&result`)[terms.lower_bound],6,3) + ' :' +Format((`&result`)[terms.upper_bound],6,3); + fprintf (stdout, + io.FormatTableRow (result_row,relative_rates.table_output_options)); + " + ); + + return rate_statistics; + +} + + + From ab498aa1589e0988be2b80077eb886c382733fe1 Mon Sep 17 00:00:00 2001 From: Sergei L Kosakovsky Pond Date: Thu, 12 Oct 2017 08:38:19 -0400 Subject: [PATCH 07/19] Fixing regression bug in MEME (issue #666 [demonic, indeed]) --- res/TemplateBatchFiles/SelectionAnalyses/MEME.bf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf index 91068606d..29ebe7d6d 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf @@ -609,7 +609,7 @@ lfunction meme.store_results (node, result, arguments) { result_row [5] = lrt [utility.getGlobalValue("terms.LRT")]; result_row [6] = lrt [utility.getGlobalValue("terms.p_value")]; - filtered_ebf = utility.Filter (ebf, "_value_", "_value_>=100"); + filtered_ebf = utility.Filter (result[utility.getGlobalValue("terms.empirical_bayes_factor")], "_value_", "_value_>=100"); if(None != filtered_ebf) { result_row [7] = utility.Array1D(filtered_ebf); From 06a764d2e6341585c89d10d1fab39bc8f4f37429 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 12 Oct 2017 13:33:31 -0400 Subject: [PATCH 08/19] Added timers to proteinfitter and save the final LF: --- .../ProteinAnalyses/ProteinGTRFit.bf | 68 ++++++++++++++++--- .../ProteinAnalyses/ProteinGTRFit_helper.ibf | 29 ++++++-- 2 files changed, 83 insertions(+), 14 deletions(-) diff --git a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf index d08b41897..2a80c4538 100644 --- a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf +++ b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf @@ -22,9 +22,13 @@ LoadFunctionLibrary("ProteinGTRFit_helper.ibf"); /*------------------------------------------------------------------------------*/ utility.ToggleEnvVariable ("NORMALIZE_SEQUENCE_NAMES", 1); -utility.ToggleEnvVariable ("PRODUCE_OPTIMIZATION_LOG", 1); // for testing purposes +utility.ToggleEnvVariable ("PRODUCE_OPTIMIZATION_LOG", 1); -io.DisplayAnalysisBanner({ +//utility.ToggleEnvVariable ("OPTIMIZATION_PRECISION", 1); // Uncomment for testing to make it all run faster. + + + +protein_gtr.analysis_banner = { terms.io.info: "Fit a general time reversible model to a collection of training protein sequence alignments. Generate substitution and scoring matrices following the procedures described in Nickle et al 2007", @@ -32,7 +36,8 @@ io.DisplayAnalysisBanner({ terms.io.reference: "Nickle DC, Heath L, Jensen MA, Gilbert PB, Mullins JI, Kosakovsky Pond SL (2007) HIV-Specific Probabilistic Models of Protein Evolution. PLoS ONE 2(6): e503. doi:10.1371/journal.pone.0000503", terms.io.authors: "Sergei L Kosakovsky Pond and Stephanie J Spielman", terms.io.contact: "{spond,stephanie.spielman}@temple.edu" -}); +}; +io.DisplayAnalysisBanner(protein_gtr.analysis_banner); protein_gtr.filename_to_index = terms.data.filename_to_index; @@ -48,6 +53,13 @@ protein_gtr.options.tolerance = "tolerance"; protein_gtr.options.baseline_model = "baseline model"; protein_gtr.options.rate_variation = "use rate variation"; +protein_gtr.analysis_results = {terms.json.analysis: protein_gtr.analysis_banner, + terms.json.input: {}, + terms.json.timers: {}}; + +protein_gtr.timers = {}; + + /********************************************** MENU PROMPTS ********************************************************/ /********************************************************************************************************************/ @@ -57,11 +69,12 @@ SetDialogPrompt ("Supply a list of files to include in the analysis (one per lin fscanf (PROMPT_FOR_FILE, "Lines", protein_gtr.file_list); protein_gtr.listfile = utility.getGlobalValue("LAST_FILE_PATH"); protein_gtr.json_file = protein_gtr.listfile + ".json"; +protein_gtr.final_likelihood_function = protein_gtr.listfile + "_Final-Phase-LF.nex"; protein_gtr.file_list = io.validate_a_list_of_files (protein_gtr.file_list); protein_gtr.file_list_count = Abs (protein_gtr.file_list); protein_gtr.index_to_filename = utility.SwapKeysAndValues(protein_gtr.file_list); /********* PROMPTS **********/ -protein_gtr.analysis_results = {}; + // Prompt for convergence assessment type protein_gtr.convergence_type = io.SelectAnOption( protein_gtr.convergence_options, "Select a convergence criterion."); @@ -91,6 +104,10 @@ if (protein_gtr.use_rate_variation == "Yes"){ /********************************************************************************************************************/ + +protein_gtr.startTimer (protein_gtr.timers, "Total time"); + + protein_gtr.queue = mpi.CreateQueue ({ utility.getGlobalValue("terms.mpi.Headers") : utility.GetListOfLoadedModules ("libv3/") , utility.getGlobalValue("terms.mpi.Functions") : { @@ -127,6 +144,9 @@ Perform an initial fit of Baseline model+F(+/-4G) to the data (or load cached fi *****************************************************************/ console.log("\n\n[PHASE 1] Performing initial branch length optimization using " + protein_gtr.baseline_model); +protein_gtr.startTimer (protein_gtr.timers, protein_gtr.baseline_phase); +protein_gtr.timer_count +=1; + for (file_index = 0; file_index < protein_gtr.file_list_count; file_index += 1) { io.ReportProgressMessageMD ("Protein GTR Fitter", " * Initial branch length fit", @@ -138,6 +158,7 @@ for (file_index = 0; file_index < protein_gtr.file_list_count; file_index += 1) } mpi.QueueComplete (protein_gtr.queue); +protein_gtr.stopTimer (protein_gtr.timers, protein_gtr.baseline_phase); // Sum of the logL from fitted baseline model across each data set protein_gtr.baseline_fit_logL = math.Sum (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.baseline_phase"), "_value_", "(_value_[protein_gtr.baseline_phase])[terms.fit.log_likelihood]")); @@ -151,8 +172,12 @@ io.ReportProgressMessageMD ("Protein GTR Fitter", " * Initial branch length fit" *****************************************************************/ console.log("\n\n[PHASE 2] Performing initial REV fit to the data"); -result_key = protein_gtr.rev_phase_prefix + protein_gtr.fit_phase; + + +result_key = protein_gtr.rev_phase_prefix + protein_gtr.fit_phase; +protein_gtr.startTimer (protein_gtr.timers, result_key); +protein_gtr.timer_count +=1; current = utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/'" + protein_gtr.baseline_phase + "'"), "_value_", "_value_['" + protein_gtr.baseline_phase + "']"); // console.log(utility.Keys(protein_gtr.analysis_results)); @@ -200,6 +225,8 @@ if (Type (protein_gtr.shared_EFV) == "String") { // parameters +protein_gtr.stopTimer (protein_gtr.timers, result_key); + /********************************** STEP THREE ****************************************** Iteratively optimize branch lengths with previous REV fit, and re-optimize REV *****************************************************************************************/ @@ -207,17 +234,27 @@ if (Type (protein_gtr.shared_EFV) == "String") { console.log("\n\n[PHASE 3] Iteratively optimizing branch lengths and fitting REV model until convergence."); for (;;) { - - // Optimize branch lengths, with 4-category gamma. Record logL + protein_gtr.fit_phase += 1; + + // Optimize branch lengths + protein_gtr.phase_key = protein_gtr.bl_phase_prefix + protein_gtr.fit_phase; + protein_gtr.startTimer (protein_gtr.timers, protein_gtr.phase_key); + protein_gtr.timer_count +=1; protein_gtr.phase_results = protein_gtr.run_gtr_iteration_branch_lengths(); + protein_gtr.stopTimer (protein_gtr.timers, protein_gtr.phase_key); + + // Commented out below because this is never actually used in the analysis, and it is always cached anyways // protein_gtr.scores + protein_gtr.phase_results[terms.fit.log_likelihood]; result_key = protein_gtr.rev_phase_prefix + protein_gtr.fit_phase; - + + protein_gtr.startTimer (protein_gtr.timers, result_key); + protein_gtr.timer_count +=1; protein_gtr.current_gtr_fit = protein_gtr.fitGTRtoFileList (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.phase_results['phase']"), "_value_", "_value_[protein_gtr.phase_results['phase']]"), protein_gtr.current_gtr_fit, result_key, FALSE); + protein_gtr.stopTimer (protein_gtr.timers, result_key); protein_gtr.scores + (protein_gtr.analysis_results[result_key])[terms.fit.log_likelihood]; @@ -266,9 +303,22 @@ for (;;) { console.log("\n\n[PHASE 4] Convergence achieved. Optimizing final model."); -protein_gtr.current_gtr_fit = protein_gtr.fitGTRtoFileList (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.phase_results['phase']"), "_value_", "_value_[protein_gtr.phase_results['phase']]"), protein_gtr.current_gtr_fit, protein_gtr.final_phase, TRUE); + +protein_gtr.startTimer (protein_gtr.timers, protein_gtr.final_phase); +protein_gtr.current_gtr_fit = protein_gtr.fitGTRtoFileList (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.phase_results['phase']"), "_value_", "_value_[protein_gtr.phase_results['phase']]"), + protein_gtr.current_gtr_fit, + protein_gtr.final_phase, + TRUE); + + +protein_gtr.stopTimer (protein_gtr.timers, protein_gtr.final_phase); + + + /* Save the JSON */ +protein_gtr.stopTimer (protein_gtr.timers, "Total time"); +protein_gtr.analysis_results[terms.json.timers] = protein_gtr.timers; io.SpoolJSON(protein_gtr.analysis_results, protein_gtr.json_file); diff --git a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf index f8fb782ad..51594f5d1 100644 --- a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf +++ b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf @@ -64,6 +64,21 @@ function protein_gtr.save_options() { +lfunction protein_gtr.startTimer(timers, key) { + timers[key] = { + utility.getGlobalValue("terms.timers.timer"): Time(1), + }; + +} +lfunction protein_gtr.stopTimer(timers, key) { + (timers[key])[utility.getGlobalValue("terms.timers.timer")] = Time(1) - (timers[key])[utility.getGlobalValue("terms.timers.timer")]; +} + + + + + + /* Model definitions, in particular for models with rate variation */ //------------------------------------------------------------------------------------------------------------------------ @@ -293,8 +308,14 @@ function protein_gtr.fitGTRtoFileList (current_results, previous_values, phase, trees, protein_gtr.rev_model_branch_lengths, previous_values, - None + {terms.run_options.retain_lf_object : TRUE} ); + + + lf_id = protein_gtr.rev.mle[terms.likelihood_function]; + Export(protein_gtr.finalphase_LF, ^lf_id); + protein_gtr.rev.mle - terms.likelihood_function; + fprintf(protein_gtr.final_likelihood_function, protein_gtr.finalphase_LF); } @@ -381,7 +402,6 @@ function protein_gtr.UpdateBLWithREV (filename, rates, branch_lengths) { * @return Dictionary containing summed LogL values from branch length optimizations and the phase index for this iteration */ function protein_gtr.run_gtr_iteration_branch_lengths () { - protein_gtr.fit_phase += 1; protein_gtr.queue = mpi.CreateQueue ({ "Headers" : utility.GetListOfLoadedModules ("libv3/") , "Functions" : @@ -395,7 +415,8 @@ function protein_gtr.run_gtr_iteration_branch_lengths () { "Variables" : {{ "protein_gtr.shared_EFV", "protein_gtr.rev_model_branch_lengths", - "protein_gtr.filename_to_index" + "protein_gtr.filename_to_index", + "protein_gtr.phase_key" }} }); @@ -414,8 +435,6 @@ function protein_gtr.run_gtr_iteration_branch_lengths () { - protein_gtr.phase_key = protein_gtr.bl_phase_prefix + protein_gtr.fit_phase; - io.ReportProgressMessageMD ("Protein GTR Fitter", protein_gtr.phase_key, "Retuning branch lengths (" + protein_gtr.phase_key + ")"); for (file_index = 0; file_index < protein_gtr.file_list_count; file_index += 1) { From 4de881ceab3052b2b4dfe247bdf6a4db8a002a16 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 12 Oct 2017 15:07:33 -0400 Subject: [PATCH 09/19] Added GDD variation to proteinfitter and added REV variation templates into REV.bf actually --- .../ProteinAnalyses/ProteinGTRFit.bf | 29 +++++--- .../ProteinAnalyses/ProteinGTRFit_helper.ibf | 66 +++++++++++-------- .../libv3/models/protein/REV.bf | 19 ++++++ 3 files changed, 77 insertions(+), 37 deletions(-) diff --git a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf index 2a80c4538..7b9893ec9 100644 --- a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf +++ b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf @@ -24,7 +24,7 @@ LoadFunctionLibrary("ProteinGTRFit_helper.ibf"); utility.ToggleEnvVariable ("NORMALIZE_SEQUENCE_NAMES", 1); utility.ToggleEnvVariable ("PRODUCE_OPTIMIZATION_LOG", 1); -//utility.ToggleEnvVariable ("OPTIMIZATION_PRECISION", 1); // Uncomment for testing to make it all run faster. +utility.ToggleEnvVariable ("OPTIMIZATION_PRECISION", 1); // Uncomment for testing to make it all run faster. @@ -86,21 +86,28 @@ protein_gtr.tolerance = io.PromptUser ("\n>Provide a tolerance level for converg protein_gtr.baseline_model = io.SelectAnOption (models.protein.empirical_models, "Select an empirical protein model to use for optimizing the provided branch lengths (we recommend LG):"); // Prompt for rate variation -protein_gtr.use_rate_variation = io.SelectAnOption( protein_gtr.rate_variation_options, "Would you like to optimize branch lengths with rate variation (uses a four-category gamma)?"); +protein_gtr.use_rate_variation = io.SelectAnOption( protein_gtr.rate_variation_options, "Would you like to optimize branch lengths with rate variation?"); protein_gtr.save_options(); -if (protein_gtr.use_rate_variation == "Yes"){ - protein_gtr.baseline_model_name = protein_gtr.baseline_model + "+F, with Gamma rates"; +if (protein_gtr.use_rate_variation == "Gamma"){ + protein_gtr.baseline_model_name = protein_gtr.baseline_model + "+F, with 4 category Gamma rates"; protein_gtr.baseline_model_desc = "protein_gtr.Baseline.ModelDescription.withGamma"; protein_gtr.rev_model_branch_lengths = "protein_gtr.REV.ModelDescription.withGamma"; -} else{ - protein_gtr.baseline_model_name = protein_gtr.baseline_model + "+F"; - protein_gtr.baseline_model_desc = "protein_gtr.Baseline.ModelDescription"; - protein_gtr.rev_model_branch_lengths = "protein_gtr.REV.ModelDescription"; +} +else { + if (protein_gtr.use_rate_variation == "GDD"){ + protein_gtr.baseline_model_name = protein_gtr.baseline_model + "+F, with 4 category GDD rates"; + protein_gtr.baseline_model_desc = "protein_gtr.Baseline.ModelDescription.withGDD4"; + protein_gtr.rev_model_branch_lengths = "protein_gtr.REV.ModelDescription.withGDD4"; + } + else { + protein_gtr.baseline_model_name = protein_gtr.baseline_model + "+F"; + protein_gtr.baseline_model_desc = "protein_gtr.Baseline.ModelDescription"; + protein_gtr.rev_model_branch_lengths = "protein_gtr.REV.ModelDescription"; + } } -//protein_gtr.shared_EFV = {20,1}; /********************************************************************************************************************/ @@ -112,15 +119,17 @@ protein_gtr.queue = mpi.CreateQueue ({ utility.getGlobalValue("terms.mpi.Header utility.getGlobalValue("terms.mpi.Functions") : { {"models.protein.REV.ModelDescription.withGamma", + "models.protein.REV.ModelDescription.withGDD4", "protein_gtr.REV.ModelDescription", "protein_gtr.REV.ModelDescription.withGamma", + "protein_gtr.REV.ModelDescription.withGDD4", "protein_gtr.REV.ModelDescription.freqs", "protein_gtr.Baseline.ModelDescription.withGamma", + "protein_gtr.Baseline.ModelDescription.withGDD4", "protein_gtr.Baseline.ModelDescription", "protein_gtr.fitBaselineToFile" } }, - utility.getGlobalValue("terms.mpi.Variables") : {{ "protein_gtr.shared_EFV", "protein_gtr.baseline_model_desc", diff --git a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf index 51594f5d1..f415473dc 100644 --- a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf +++ b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf @@ -1,6 +1,9 @@ /*************** Functions used in ProteinGTRFit.bf ******************/ protein_gtr.convergence_options = {{"LogL", "Assess REV fit convergence by comparing log likelihood scores"}, {"RMSE", "[Recommended] Assess REV fit convergence by comparing RMSE between fitted matrices."}}; -protein_gtr.rate_variation_options = {{"Yes", "Use a four-category discrete gamma distribution when optimizing branch lengths."}, {"No", "Do not consider rate variation when optimizing branch lengths."}}; +protein_gtr.rate_variation_options = {{"Gamma", "Use a four-category discrete gamma distribution when optimizing branch lengths."}, + {"GDD", "Use a four-category general discrete distribution when optimizing branch lengths."}, + {"No", "Do not consider rate variation when optimizing branch lengths."} + }; function protein_gtr.save_options() { protein_gtr.analysis_results[utility.getGlobalValue("terms.json.options")] = {utility.getGlobalValue("protein_gtr.options.convergence_type"): protein_gtr.convergence_type, @@ -100,49 +103,58 @@ function protein_gtr.Baseline.ModelDescription.withGamma(type){ return def; } - /** - * @name models.protein.REV.ModelDescription.withGamma - * @description Define REV model with four-category gamma rate variation + * @name models.protein.Baseline.ModelDescription.withGD4 + * @description Define baseline (standard matrix) model and 4bin General discrete rate variation */ -lfunction models.protein.REV.ModelDescription.withGamma (type) { - def = models.protein.REV.ModelDescription (type); - def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.Gamma.factory ({utility.getGlobalValue("terms.rate_variation.bins") : 4}); - return def; -}; +function protein_gtr.Baseline.ModelDescription.withGDD4(type){ + def = protein_gtr.Baseline.ModelDescription(type); + def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory ({utility.getGlobalValue("terms.rate_variation.bins"): 4}); + return def; +} + /** - * @name protein_gtr.REV.ModelDescription.withGamma - * @description Define a REV model with Gamma rate variation + * @name protein_gtr.REV.ModelDescription.freqs + * @description Define REV model frequencies as empirical */ -function protein_gtr.REV.ModelDescription.withGamma (type) { - result = models.protein.REV.ModelDescription.withGamma(type); - if (Type (protein_gtr.shared_EFV) == "Matrix") { - result [terms.model.frequency_estimator] = "protein_gtr.REV.ModelDescription.freqs"; - } - return result; +function protein_gtr.REV.ModelDescription.freqs (model, namespace, datafilter) { + model[terms.efv_estimate] = protein_gtr.shared_EFV; + model[terms.model.efv_estimate_name] = terms.frequencies.predefined; + (model[terms.parameters])[terms.model.empirical] = 0; + return model; } + /** * @name protein_gtr.REV.ModelDescription * @description Define a REV model with constant site rates */ function protein_gtr.REV.ModelDescription (type) { - result = models.protein.REV.ModelDescription(type); + def = models.protein.REV.ModelDescription(type); if (Type (protein_gtr.shared_EFV) == "Matrix") { - result [terms.model.frequency_estimator] = "protein_gtr.REV.ModelDescription.freqs"; + def [terms.model.frequency_estimator] = "protein_gtr.REV.ModelDescription.freqs"; } - return result; + return def; } + /** - * @name protein_gtr.REV.ModelDescription.freqs - * @description Define REV model frequencies as empirical + * @name protein_gtr.REV.ModelDescription.withGamma + * @description Define a REV model with Gamma rate variation */ -function protein_gtr.REV.ModelDescription.freqs (model, namespace, datafilter) { - model[terms.efv_estimate] = protein_gtr.shared_EFV; - model[terms.model.efv_estimate_name] = terms.frequencies.predefined; - (model[terms.parameters])[terms.model.empirical] = 0; - return model; +function protein_gtr.REV.ModelDescription.withGamma (type) { + def = models.protein.REV.ModelDescription(type); + def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.Gamma.factory ({utility.getGlobalValue("terms.rate_variation.bins"): 4}); + return def; } +/** + * @name models.protein.REV.ModelDescription.withGD4 + * @description Define a REV model with 4bin General discrete rate variation + */ +function protein_gtr.REV.ModelDescription.withGDD4(type){ + def = models.protein.REV.ModelDescription.withGDD4(type); + def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory ({utility.getGlobalValue("terms.rate_variation.bins"): 4}); + return def; +} //------------------------------------------------------------------------------------------------------------------------ diff --git a/res/TemplateBatchFiles/libv3/models/protein/REV.bf b/res/TemplateBatchFiles/libv3/models/protein/REV.bf index bee88855d..3c668da84 100644 --- a/res/TemplateBatchFiles/libv3/models/protein/REV.bf +++ b/res/TemplateBatchFiles/libv3/models/protein/REV.bf @@ -73,7 +73,26 @@ lfunction models.protein.REV._DefineQ(model_dict, namespace) { return model_dict; } +/** + * @name models.protein.REV.ModelDescription.withGamma + * @description Define REV model with four-category gamma rate variation + */ +lfunction models.protein.REV.ModelDescription.withGamma (type) { + def = models.protein.REV.ModelDescription (type); + def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.Gamma.factory ({utility.getGlobalValue("terms.rate_variation.bins") : 4}); + return def; +}; + +/** + * @name models.protein.REV.ModelDescription.withGD4 + * @description Define REV model with 4bin General discrete rate variation + */ +lfunction models.protein.REV.ModelDescription.withGDD4 (type) { + def = models.protein.REV.ModelDescription (type); + def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory ({utility.getGlobalValue("terms.rate_variation.bins") : 4}); + return def; +}; /** * @name models.protein.REV.DefineQMatrix From a906e1288095fc44b3a5b850416b6dd29ccecc14 Mon Sep 17 00:00:00 2001 From: sjspielman Date: Thu, 12 Oct 2017 15:27:41 -0400 Subject: [PATCH 10/19] Commented out optimization precision line used for testing only --- res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf index 7b9893ec9..7c5db505e 100644 --- a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf +++ b/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf @@ -24,7 +24,7 @@ LoadFunctionLibrary("ProteinGTRFit_helper.ibf"); utility.ToggleEnvVariable ("NORMALIZE_SEQUENCE_NAMES", 1); utility.ToggleEnvVariable ("PRODUCE_OPTIMIZATION_LOG", 1); -utility.ToggleEnvVariable ("OPTIMIZATION_PRECISION", 1); // Uncomment for testing to make it all run faster. +//utility.ToggleEnvVariable ("OPTIMIZATION_PRECISION", 1); // Uncomment for testing to make it all run faster. From 3afa77e4c1510417fea902d60851303cf4998966 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Sat, 14 Oct 2017 10:55:40 -0400 Subject: [PATCH 11/19] RELrates.bf is now the batchfile for relative rates analyses. Prompt and tests have been updated accordingly. ProteinGTRFit is now in TemplateBatchFiles as well --- .../{ProteinAnalyses => }/ProteinGTRFit.bf | 0 .../ProteinGTRFit_helper.ibf | 0 .../{relative_rates_scaler.bf => RELrates.bf} | 86 +++++++++++++++---- res/TemplateBatchFiles/files.lst | 2 +- tests/hbltests/libv3/ProteinGTRFit.wbf | 8 +- tests/hbltests/libv3/RELrates.wbf | 49 +++++++++++ .../libv3/relative_nucleotide_rates.wbf | 6 -- tests/hbltests/libv3/relative_prot_rates.wbf | 7 -- 8 files changed, 125 insertions(+), 33 deletions(-) rename res/TemplateBatchFiles/{ProteinAnalyses => }/ProteinGTRFit.bf (100%) rename res/TemplateBatchFiles/{ProteinAnalyses => }/ProteinGTRFit_helper.ibf (100%) rename res/TemplateBatchFiles/{relative_rates_scaler.bf => RELrates.bf} (79%) create mode 100644 tests/hbltests/libv3/RELrates.wbf delete mode 100644 tests/hbltests/libv3/relative_nucleotide_rates.wbf delete mode 100644 tests/hbltests/libv3/relative_prot_rates.wbf diff --git a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf b/res/TemplateBatchFiles/ProteinGTRFit.bf similarity index 100% rename from res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf rename to res/TemplateBatchFiles/ProteinGTRFit.bf diff --git a/res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf b/res/TemplateBatchFiles/ProteinGTRFit_helper.ibf similarity index 100% rename from res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf rename to res/TemplateBatchFiles/ProteinGTRFit_helper.ibf diff --git a/res/TemplateBatchFiles/relative_rates_scaler.bf b/res/TemplateBatchFiles/RELrates.bf similarity index 79% rename from res/TemplateBatchFiles/relative_rates_scaler.bf rename to res/TemplateBatchFiles/RELrates.bf index 48d77dc7c..cf5984a7e 100644 --- a/res/TemplateBatchFiles/relative_rates_scaler.bf +++ b/res/TemplateBatchFiles/RELrates.bf @@ -11,7 +11,7 @@ LoadFunctionLibrary("libv3/tasks/estimators.bf"); LoadFunctionLibrary("libv3/tasks/trees.bf"); LoadFunctionLibrary("libv3/tasks/mpi.bf"); LoadFunctionLibrary("libv3/convenience/math.bf"); - +LoadFunctionLibrary("libv3/models/rate_variation.bf"); LoadFunctionLibrary("libv3/models/DNA.bf"); LoadFunctionLibrary("libv3/models/DNA/GTR.bf"); @@ -45,34 +45,89 @@ relative_rates.analysis_type = io.SelectAnOption ({{relative_rates.protein_type if (relative_rates.analysis_type == relative_rates.protein_type) { - relative_rates.model_name = io.SelectAnOption (models.protein.empirical_models, + relative_rates.baseline_model = io.SelectAnOption (models.protein.empirical_models, "Select a protein model:"); + + relative_rates.use_rate_variation = io.SelectAnOption( {{"Gamma", "Use a four-category discrete gamma distribution when optimizing branch lengths."}, + {"GDD", "Use a four-category general discrete distribution when optimizing branch lengths."}, + {"No", "Do not consider rate variation when optimizing branch lengths."} + }, + "Optimize branch lengths with rate variation?"); // "Yes", "No" relative_rates.plusF = io.SelectAnOption ({{"Yes", "Use empirical (+F) amino-acid frequencies ."}, {"No", "Use default amino-acid frequencies."}}, "Use a +F model for initial branch length optimization?"); - // Set up model generator and name as +F or not. if (relative_rates.plusF == "Yes"){ - relative_rates.model_generator = models.protein.empirical.plusF_generators[relative_rates.model_name]; - relative_rates.model_name = relative_rates.model_name + "+F"; + relative_rates.generators = models.protein.empirical.plusF_generators; } else { - relative_prot_rates.model_generator = models.protein.empirical.default_generators[relative_rates.model_name]; + relative_rates.generators = models.protein.empirical.default_generators; } } else { - relative_rates.model_name = io.SelectAnOption (models.DNA.models, + relative_rates.baseline_model = io.SelectAnOption (models.DNA.models, "Select a nucleotide model:"); - relative_rates.model_generator = models.DNA.generators[relative_rates.model_name]; + relative_rates.use_rate_variation = io.SelectAnOption( {{"Gamma", "Use a four-category discrete gamma distribution when optimizing branch lengths."}, + {"GDD", "Use a four-category general discrete distribution when optimizing branch lengths."}, + {"No", "Do not consider rate variation when optimizing branch lengths."} + }, + "Optimize branch lengths with rate variation?"); + + relative_rates.generators = models.DNA.generators; + relative_rates.plusF = "No"; +} + + + + + +function relative_rates.Baseline.ModelDescription(type){ + def = Call( relative_rates.generators[relative_rates.baseline_model], type); + return def; +} + +function relative_rates.Baseline.ModelDescription.withGamma(type){ + def = relative_rates.Baseline.ModelDescription(type); + def [terms.model.rate_variation] = rate_variation.types.Gamma.factory ({terms.rate_variation.bins : 4}); + return def; +} +function relative_rates.Baseline.ModelDescription.withGDD4(type){ + def = relative_rates.Baseline.ModelDescription(type); + def [terms.model.rate_variation] = rate_variation.types.GDD.factory ({terms.rate_variation.bins : 4}); + return def; +} + + +relative_rates.baseline_model_name = relative_rates.baseline_model; +if (relative_rates.plusF == "Yes"){ + relative_rates.baseline_model_name = relative_rates.baseline_model_name + "+F"; +} + +if (relative_rates.use_rate_variation == "Gamma"){ + relative_rates.baseline_model_name = relative_rates.baseline_model_name + " with 4 category Gamma rates"; + relative_rates.baseline_model_desc = "relative_rates.Baseline.ModelDescription.withGamma"; +} +else { + if (relative_rates.use_rate_variation == "GDD"){ + relative_rates.baseline_model_name = relative_rates.baseline_model_name + " with 4 category GDD rates"; + relative_rates.baseline_model_desc = "relative_rates.Baseline.ModelDescription.withGDD4"; + } + else { + relative_rates.baseline_model_name = relative_rates.baseline_model_name; + relative_rates.baseline_model_desc = "relative_rates.Baseline.ModelDescription"; + } } +/**************************************************************/ + + + + /*******************************************************************************************************************/ /***************************************** LOAD DATASET **********************************************************/ SetDialogPrompt ("Specify a multiple sequence alignment file"); -relative_rates.alignment_info = alignments.ReadNucleotideDataSet ("relative_rates.dataset", None); - - +relative_rates.alignment_info = alignments.ReadNucleotideDataSet ("relative_rates.dataset", NOne); name_mapping = relative_rates.alignment_info[utility.getGlobalValue("terms.data.name_mapping")]; if (None == name_mapping) { @@ -104,15 +159,16 @@ relative_rates.filter_names = utility.Map (relative_rates.filter_specification, relative_rates.alignment_wide_MLES = estimators.FitSingleModel_Ext ( relative_rates.filter_names, relative_rates.trees, - relative_rates.model_generator, + relative_rates.baseline_model_desc, None, None); + + estimators.fixSubsetOfEstimates(relative_rates.alignment_wide_MLES, relative_rates.alignment_wide_MLES[terms.global]); io.ReportProgressMessageMD ("relative_rates", "overall", ">Fitted an alignment-wide model. **Log-L = " + relative_rates.alignment_wide_MLES [terms.fit.log_likelihood] + "**."); - /** Set up the table to display to the screen */ @@ -124,7 +180,7 @@ relative_rates.table_output_options = {terms.table_options.header : TRUE, terms. relative_rates.site_patterns = alignments.Extract_site_patterns (relative_rates.filter_names[0]); // set-up model for site-level fitting in the next couple of lines -relative_rates.site_model = model.generic.DefineModel(relative_rates.model_generator, +relative_rates.site_model = model.generic.DefineModel(relative_rates.baseline_model_desc, "relative_rates_site_model_instance", { "0": parameters.Quote(terms.global), }, @@ -203,7 +259,7 @@ io.SpoolJSON ({ terms.json.input : {terms.json.file: relative_rates.alignment_in terms.json.tree_string: (tree_definition[0])[terms.trees.newick_with_lengths]}, terms.json.analysis : relative_rates.analysis_description, terms.json.relative_site_rates : relative_rates.rate_estimates, - terms.json.global: {terms.json.model: relative_rates.model_name, + terms.json.global: {terms.json.model: relative_rates.baseline_model_name, //terms.json.branch_lengths: relative_rates.alignment_wide_MLES[terms.branch_length], terms.json.tree_string: (relative_rates.alignment_wide_MLES[terms.fit.trees])[0], terms.json.log_likelihood: relative_rates.alignment_wide_MLES[terms.fit.log_likelihood]} diff --git a/res/TemplateBatchFiles/files.lst b/res/TemplateBatchFiles/files.lst index bd92aa56e..226cf7ed6 100644 --- a/res/TemplateBatchFiles/files.lst +++ b/res/TemplateBatchFiles/files.lst @@ -11,7 +11,7 @@ "FEL-Contrast","Use a FEL method to test which sites in a gene may be associated with adaptation to a different environment.","SelectionAnalyses/FEL-contrast.bf"; "","Evolutionary rates on non-coding data.","!Relative evolutionary rate inference"; -"Relative rates","Infer relative evolutionary rates on a nucleotide or protein alignment, in the spirit of Rate4Site (PMID: 12169533).","relative_rates_scaler.bf"; +"RELrates","Infer relative evolutionary rates on a nucleotide or protein alignment, in the spirit of Rate4Site (PMID: 12169533).","RELrates.bf"; "","Perform a maximum likelihood analysis on a single file given a single tree.","!Basic Analyses"; "ACD","Analyse codon data with a variery of standard models using given tree.","AnalyzeCodonData.bf"; diff --git a/tests/hbltests/libv3/ProteinGTRFit.wbf b/tests/hbltests/libv3/ProteinGTRFit.wbf index e6544b77c..ffef8dfed 100644 --- a/tests/hbltests/libv3/ProteinGTRFit.wbf +++ b/tests/hbltests/libv3/ProteinGTRFit.wbf @@ -1,8 +1,8 @@ LoadFunctionLibrary("libv3/UtilityFunctions.bf"); LoadFunctionLibrary("libv3/IOFunctions.bf"); -OPTIMIZATION_TIME_HARD_LIMIT=1; - +utility.ToggleEnvVariable ("OPTIMIZATION_PRECISION", 1); +utility.ToggleEnvVariable ("OPTIMIZATION_TIME_HARD_LIMIT", 1); function ensureFullPath (path) { if ((path $ "/")[0]!= 0){ @@ -29,12 +29,12 @@ fprintf(list_path_final, CLEAR_FILE, ""); utility.ForEach(file_list2, "_value_", "writeNewPath(_value_)"); -LoadFunctionLibrary("ProteinAnalyses/ProteinGTRFit.bf", { +LoadFunctionLibrary("ProteinGTRFit.bf", { "0": PATH_TO_CURRENT_BF + "data/protgtr_fitter_lines.txt", "1": "LogL", // use logL convergence "2": "0.1", // stopping "3": "WAG", // use WAG for baseline - "4": "Yes", // use a gamma for rate variation + "4": "Gamma", // use a gamma for rate variation }); diff --git a/tests/hbltests/libv3/RELrates.wbf b/tests/hbltests/libv3/RELrates.wbf new file mode 100644 index 000000000..2edf896ff --- /dev/null +++ b/tests/hbltests/libv3/RELrates.wbf @@ -0,0 +1,49 @@ +LoadFunctionLibrary("libv3/UtilityFunctions.bf"); +utility.ToggleEnvVariable ("OPTIMIZATION_PRECISION", 1); +utility.ToggleEnvVariable ("OPTIMIZATION_TIME_HARD_LIMIT", 1); + + +LoadFunctionLibrary("RELrates.bf", { + "0": "Protein", + "1": "WAG", + "2": "Gamma", + "3": "Yes", + "4": PATH_TO_CURRENT_BF + "data/dat.prot", + "5": "Y" +}); + + +LoadFunctionLibrary("RELrates.bf", { + "0": "Protein", + "1": "WAG", + "2": "GDD", + "3": "No", + "4": PATH_TO_CURRENT_BF + "data/dat.prot", + "5": "Y" +}); + + +LoadFunctionLibrary("RELrates.bf", { + "0": "Protein", + "1": "WAG", + "2": "No", + "3": "No", + "4": PATH_TO_CURRENT_BF + "data/dat.prot", + "5": "Y" +}); + +LoadFunctionLibrary("RELrates.bf", { + "0": "Nucleotide", + "1": "GTR", + "2": "Gamma", + "3": PATH_TO_CURRENT_BF + "data/dat.nuc", + "4": "Y" +}); + +LoadFunctionLibrary("RELrates.bf", { + "0": "Nucleotide", + "1": "GTR", + "2": "No", + "3": PATH_TO_CURRENT_BF + "data/dat.nuc", + "4": "Y" +}); \ No newline at end of file diff --git a/tests/hbltests/libv3/relative_nucleotide_rates.wbf b/tests/hbltests/libv3/relative_nucleotide_rates.wbf deleted file mode 100644 index 0704e6ca3..000000000 --- a/tests/hbltests/libv3/relative_nucleotide_rates.wbf +++ /dev/null @@ -1,6 +0,0 @@ -LoadFunctionLibrary("relative_nucleotide_rates.bf", { - "0": PATH_TO_CURRENT_BF + "data/dat.nuc", - "1": "Y", - "2": "GTR" -}); - diff --git a/tests/hbltests/libv3/relative_prot_rates.wbf b/tests/hbltests/libv3/relative_prot_rates.wbf deleted file mode 100644 index fb4f6064b..000000000 --- a/tests/hbltests/libv3/relative_prot_rates.wbf +++ /dev/null @@ -1,7 +0,0 @@ -LoadFunctionLibrary("ProteinAnalyses/relative_prot_rates.bf", { - "0": PATH_TO_CURRENT_BF + "data/dat.prot", - "1": "Y", - "2": "LG", // fit - "3": "Yes" // yes +F -}); - From 10000c5c15200d0f9dd9aa7c94ad4d2706480686 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Sat, 14 Oct 2017 11:45:30 -0400 Subject: [PATCH 12/19] Improvements to RELrates JSON. Critical bug fix to AA model normalization where rates were undefined where target EFV=0 --- res/TemplateBatchFiles/RELrates.bf | 21 ++++++++++++++----- .../libv3/models/protein/empirical.bf | 14 +++++++++---- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/res/TemplateBatchFiles/RELrates.bf b/res/TemplateBatchFiles/RELrates.bf index cf5984a7e..753c0abc5 100644 --- a/res/TemplateBatchFiles/RELrates.bf +++ b/res/TemplateBatchFiles/RELrates.bf @@ -163,7 +163,7 @@ relative_rates.alignment_wide_MLES = estimators.FitSingleModel_Ext ( None, None); - + estimators.fixSubsetOfEstimates(relative_rates.alignment_wide_MLES, relative_rates.alignment_wide_MLES[terms.global]); @@ -179,14 +179,15 @@ relative_rates.table_output_options = {terms.table_options.header : TRUE, terms. relative_rates.site_patterns = alignments.Extract_site_patterns (relative_rates.filter_names[0]); -// set-up model for site-level fitting in the next couple of lines -relative_rates.site_model = model.generic.DefineModel(relative_rates.baseline_model_desc, +// set-up model for site-level fitting in the next couple of lines, where rv turned off +relative_rates.site_model = model.generic.DefineModel("relative_rates.Baseline.ModelDescription", "relative_rates_site_model_instance", { "0": parameters.Quote(terms.global), }, relative_rates.filter_names[0], None); - + + relative_rates.site_model_mapping = {"relative_rates_site_model_instance" : relative_rates.site_model}; @@ -251,6 +252,15 @@ io.ReportProgressMessageMD ("relative_rates", "Stats", "* **Median**: " + Forma io.ReportProgressMessageMD ("relative_rates", "Stats", "* **Std.Dev**: " + Format (relative_rates.stats[terms.math.stddev], 6, 2)); io.ReportProgressMessageMD ("relative_rates", "Stats", "* **95% Range**: [" + Format (relative_rates.stats[terms.math._2.5], 5,2) + "," + Format (relative_rates.stats[terms.math._97.5], 5,2) + "]"); + +if (relative_rates.use_rate_variation == "No"){ + relative_rates.storerv = "None"; +} else { + relative_rates.storerv = utility.Map( + utility.Map (relative_rates.alignment_wide_MLES[utility.getGlobalValue("terms.global")], "_value_", ' {terms.fit.MLE : _value_[terms.fit.MLE]}'), + "_value_", + "_value_[terms.fit.MLE]"); +} tree_definition = utility.Map (relative_rates.partitions_and_trees, "_partition_", '_partition_[terms.data.tree]'); io.SpoolJSON ({ terms.json.input : {terms.json.file: relative_rates.alignment_info[terms.data.file], @@ -260,7 +270,8 @@ io.SpoolJSON ({ terms.json.input : {terms.json.file: relative_rates.alignment_in terms.json.analysis : relative_rates.analysis_description, terms.json.relative_site_rates : relative_rates.rate_estimates, terms.json.global: {terms.json.model: relative_rates.baseline_model_name, - //terms.json.branch_lengths: relative_rates.alignment_wide_MLES[terms.branch_length], + terms.model.rate_variation: relative_rates.storerv, + terms.efv_estimate: (relative_rates.alignment_wide_MLES[utility.getGlobalValue("terms.efv_estimate")])["VALUEINDEXORDER"][0], terms.json.tree_string: (relative_rates.alignment_wide_MLES[terms.fit.trees])[0], terms.json.log_likelihood: relative_rates.alignment_wide_MLES[terms.fit.log_likelihood]} }, diff --git a/res/TemplateBatchFiles/libv3/models/protein/empirical.bf b/res/TemplateBatchFiles/libv3/models/protein/empirical.bf index 1e179b13e..f716c46be 100644 --- a/res/TemplateBatchFiles/libv3/models/protein/empirical.bf +++ b/res/TemplateBatchFiles/libv3/models/protein/empirical.bf @@ -143,7 +143,6 @@ lfunction models.protein.empirical._NormalizeEmpiricalRates(model_dict, namespac Q[i][j] = Q[i][j] / norm; } } - // Now convert it BACK TO hyphy dictionary with frequencies divided out. // // ************** This sets the new empirical rates. ************** // @@ -152,11 +151,16 @@ lfunction models.protein.empirical._NormalizeEmpiricalRates(model_dict, namespac new_empirical_rates[alphabet[l1]] = {}; for (l2 = l1 + 1; l2 < dim; l2 += 1) { - nof_rate = Q [l1][l2] / EFV[l2]; + + if (EFV[l2] == 0){ + nof_rate = 0.; + } + else { + nof_rate = Q [l1][l2] / EFV[l2]; + } (new_empirical_rates[alphabet[l1]])[alphabet[l2]] = nof_rate; } } - model_dict[ utility.getGlobalValue("terms.model.empirical_rates")] = new_empirical_rates; return model_dict; @@ -198,7 +202,9 @@ function models.protein.empirical.DefineQMatrix (modelSpec, namespace) { // ADDED FOR EMPIRICAL MODELS __empirical_rates = modelSpec[terms.model.empirical_rates]; - + + + __global_cache = {}; if (None != __rate_variation) { From 6cfc546abca9a03aaa05b0058c8f3b5e4a776617 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 16 Oct 2017 13:36:59 -0400 Subject: [PATCH 13/19] RELrates renamed to way better name LEISR to avoid confusing with REL methods --- res/TemplateBatchFiles/LEISR.bf | 351 ++++++++++++++++++ res/TemplateBatchFiles/RELrates.bf | 351 ------------------ res/TemplateBatchFiles/files.lst | 2 +- .../libv3/{RELrates.wbf => LEISR.wbf} | 10 +- 4 files changed, 357 insertions(+), 357 deletions(-) create mode 100644 res/TemplateBatchFiles/LEISR.bf delete mode 100644 res/TemplateBatchFiles/RELrates.bf rename tests/hbltests/libv3/{RELrates.wbf => LEISR.wbf} (81%) diff --git a/res/TemplateBatchFiles/LEISR.bf b/res/TemplateBatchFiles/LEISR.bf new file mode 100644 index 000000000..e6a3df4e4 --- /dev/null +++ b/res/TemplateBatchFiles/LEISR.bf @@ -0,0 +1,351 @@ +RequireVersion("2.3.5"); + +LoadFunctionLibrary("libv3/UtilityFunctions.bf"); +LoadFunctionLibrary("libv3/IOFunctions.bf"); +LoadFunctionLibrary("libv3/stats.bf"); +LoadFunctionLibrary("libv3/all-terms.bf"); + +LoadFunctionLibrary("libv3/tasks/ancestral.bf"); +LoadFunctionLibrary("libv3/tasks/alignments.bf"); +LoadFunctionLibrary("libv3/tasks/estimators.bf"); +LoadFunctionLibrary("libv3/tasks/trees.bf"); +LoadFunctionLibrary("libv3/tasks/mpi.bf"); +LoadFunctionLibrary("libv3/convenience/math.bf"); +LoadFunctionLibrary("libv3/models/rate_variation.bf"); + +LoadFunctionLibrary("libv3/models/DNA.bf"); +LoadFunctionLibrary("libv3/models/DNA/GTR.bf"); +LoadFunctionLibrary("libv3/models/DNA/HKY85.bf"); +LoadFunctionLibrary("libv3/models/DNA/JC69.bf"); +LoadFunctionLibrary("libv3/models/protein.bf"); +LoadFunctionLibrary("libv3/models/protein/empirical.bf"); + + +/*------------------------------------------------------------------------------*/ + +utility.ToggleEnvVariable ("NORMALIZE_SEQUENCE_NAMES", 1); + +leisr.analysis_description = { + terms.io.info: "LEISR (Likelihood Estimation of Individual Site Rates) infer relative amino-acid or nucleotide rates from a fixed nucleotide or amino-acid alignment and tree. Relative site-specific substitution rates are + inferred by first optimizing alignment-wide branch lengths, and then inferring a site-specific uniform tree scaler", + terms.io.version: "0.1alpha", + terms.io.reference: "@TBD. Analysis based on Rate4Site method, : Pupko, T., Bell, R. E., Mayrose, I., Glaser, F. & Ben-Tal, N. Rate4Site: an algorithmic tool for the identification of functional regions in proteins by surface mapping of evolutionary determinants within their homologues. Bioinformatics 18, S71–S77 (2002).", + terms.io.authors: "Sergei L Kosakovsky Pond and Stephanie J Spielman", + terms.io.contact: "{spond,stephanie.spielman}@temple.edu" +}; + +io.DisplayAnalysisBanner(leisr.analysis_description); + +/***************************************** MODEL SELECTION **********************************************************/ + +leisr.protein_type = "Protein"; +leisr.nucleotide_type = "Nucleotide"; +leisr.analysis_type = io.SelectAnOption ({{leisr.protein_type , "Infer relative rates from a protein (amino-acid) alignment"}, {leisr.nucleotide_type, "Infer relative rates from a nucleotide alignment"}}, + "Select your analysis type:"); + + +if (leisr.analysis_type == leisr.protein_type) { + leisr.baseline_model = io.SelectAnOption (models.protein.empirical_models, + "Select a protein model:"); + + leisr.use_rate_variation = io.SelectAnOption( {{"Gamma", "Use a four-category discrete gamma distribution when optimizing branch lengths."}, + {"GDD", "Use a four-category general discrete distribution when optimizing branch lengths."}, + {"No", "Do not consider rate variation when optimizing branch lengths."} + }, + "Optimize branch lengths with rate variation?"); + // "Yes", "No" + leisr.plusF = io.SelectAnOption ({{"Yes", "Use empirical (+F) amino-acid frequencies ."}, {"No", "Use default amino-acid frequencies."}}, + "Use a +F model for initial branch length optimization?"); + if (leisr.plusF == "Yes"){ + leisr.generators = models.protein.empirical.plusF_generators; + } + else { + leisr.generators = models.protein.empirical.default_generators; + } +} +else { + + leisr.baseline_model = io.SelectAnOption (models.DNA.models, + "Select a nucleotide model:"); + leisr.use_rate_variation = io.SelectAnOption( {{"Gamma", "Use a four-category discrete gamma distribution when optimizing branch lengths."}, + {"GDD", "Use a four-category general discrete distribution when optimizing branch lengths."}, + {"No", "Do not consider rate variation when optimizing branch lengths."} + }, + "Optimize branch lengths with rate variation?"); + + leisr.generators = models.DNA.generators; + leisr.plusF = "No"; +} + + + + + +function leisr.Baseline.ModelDescription(type){ + def = Call( leisr.generators[leisr.baseline_model], type); + return def; +} + +function leisr.Baseline.ModelDescription.withGamma(type){ + def = leisr.Baseline.ModelDescription(type); + def [terms.model.rate_variation] = rate_variation.types.Gamma.factory ({terms.rate_variation.bins : 4}); + return def; +} +function leisr.Baseline.ModelDescription.withGDD4(type){ + def = leisr.Baseline.ModelDescription(type); + def [terms.model.rate_variation] = rate_variation.types.GDD.factory ({terms.rate_variation.bins : 4}); + return def; +} + + +leisr.baseline_model_name = leisr.baseline_model; +if (leisr.plusF == "Yes"){ + leisr.baseline_model_name = leisr.baseline_model_name + "+F"; +} + +if (leisr.use_rate_variation == "Gamma"){ + leisr.baseline_model_name = leisr.baseline_model_name + " with 4 category Gamma rates"; + leisr.baseline_model_desc = "leisr.Baseline.ModelDescription.withGamma"; +} +else { + if (leisr.use_rate_variation == "GDD"){ + leisr.baseline_model_name = leisr.baseline_model_name + " with 4 category GDD rates"; + leisr.baseline_model_desc = "leisr.Baseline.ModelDescription.withGDD4"; + } + else { + leisr.baseline_model_name = leisr.baseline_model_name; + leisr.baseline_model_desc = "leisr.Baseline.ModelDescription"; + } +} +/**************************************************************/ + + + + +/*******************************************************************************************************************/ + + +/***************************************** LOAD DATASET **********************************************************/ +SetDialogPrompt ("Specify a multiple sequence alignment file"); +leisr.alignment_info = alignments.ReadNucleotideDataSet ("leisr.dataset", NOne); + +name_mapping = leisr.alignment_info[utility.getGlobalValue("terms.data.name_mapping")]; +if (None == name_mapping) { + name_mapping = {}; + utility.ForEach (alignments.GetSequenceNames ("leisr.dataset"), "_value_", "`&name_mapping`[_value_] = _value_"); +} +leisr.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions (leisr.alignment_info[utility.getGlobalValue("terms.data.partitions")], name_mapping); +leisr.partition_count = Abs (leisr.partitions_and_trees); + +io.CheckAssertion ("leisr.partition_count==1", "This analysis can only handle a single partition"); + + +io.ReportProgressMessageMD ("relative_rates", "Data", "Input alignment description"); +io.ReportProgressMessageMD ("relative_rates", "Data", "Loaded **" + + leisr.alignment_info [terms.data.sequences] + "** sequences, **" + + leisr.alignment_info [terms.data.sites] + "** sites, and **" + leisr.partition_count + "** partitions from \`" + leisr.alignment_info [terms.data.file] + "\`"); +leisr.filter_specification = alignments.DefineFiltersForPartitions (leisr.partitions_and_trees, "leisr.dataset" , "leisr.filter.", leisr.alignment_info); +/*******************************************************************************************************************/ + + + +/***************************************** INFERENCE **********************************************************/ + + +io.ReportProgressMessageMD ("relative_rates", "overall", "Obtaining alignment-wide branch-length estimates"); + +leisr.trees = utility.Map (leisr.partitions_and_trees, "_value_", "_value_[terms.data.tree]"); // value => value['tree'] +leisr.filter_names = utility.Map (leisr.filter_specification, "_value_", "_value_[terms.data.name]"); // value => value['name'] +leisr.alignment_wide_MLES = estimators.FitSingleModel_Ext ( + leisr.filter_names, + leisr.trees, + leisr.baseline_model_desc, + None, + None); + + + +estimators.fixSubsetOfEstimates(leisr.alignment_wide_MLES, leisr.alignment_wide_MLES[terms.global]); + +io.ReportProgressMessageMD ("relative_rates", "overall", ">Fitted an alignment-wide model. **Log-L = " + leisr.alignment_wide_MLES [terms.fit.log_likelihood] + "**."); + +/** + Set up the table to display to the screen +*/ + + +leisr.table_screen_output = {{"Site", "Rel. rate (MLE)", "95% profile likelihood CI"}}; +leisr.table_output_options = {terms.table_options.header : TRUE, terms.table_options.minimum_column_width : 16, terms.table_options.align : "center"}; + +leisr.site_patterns = alignments.Extract_site_patterns (leisr.filter_names[0]); + +// set-up model for site-level fitting in the next couple of lines, where rv turned off +leisr.site_model = model.generic.DefineModel("leisr.Baseline.ModelDescription", + "relative_rates_site_model_instance", { + "0": parameters.Quote(terms.global), + }, + leisr.filter_names[0], + None); + + + +leisr.site_model_mapping = {"relative_rates_site_model_instance" : leisr.site_model}; + +// leisr.site_tree is created from the information in leisr.trees[0] +// and populated with (the default) model +model.ApplyModelToTree( "leisr.site_tree", leisr.trees[0], {terms.default : leisr.site_model}, None); + +// create a site filter; this is an ugly hack for the time being +// alignments.serialize_site_filter returns HBL code as string in +// which the function `__make_filter` is defined. +ExecuteCommands (alignments.serialize_site_filter ( + leisr.filter_names[0], + ((leisr.site_patterns[0])[terms.data.sites])[0])); + +__make_filter ("leisr.site_filter"); + +LikelihoodFunction leisr.site_likelihood = (leisr.site_filter, leisr.site_tree); + +leisr.site_model_scaler_name = "leisr.site_rate_estimate"; + +leisr.rate_estimates = {}; + +/** + this will store site estimates, which will then be dumped to JSON +*/ + +parameters.DeclareGlobal (leisr.site_model_scaler_name, None); + +estimators.ApplyExistingEstimates ("leisr.site_likelihood", leisr.site_model_mapping, leisr.alignment_wide_MLES, + {"0" : leisr.site_model_scaler_name} // proportional scaler + ); + + +leisr.queue = mpi.CreateQueue ({terms.mpi.LikelihoodFunctions: {{"leisr.site_likelihood"}}, + terms.mpi.Models : {{"leisr.site_model"}}, + terms.mpi.Headers : utility.GetListOfLoadedModules ("libv3/"), + terms.mpi.Variables : {{"leisr.site_model_scaler_name"}} + }); + +/* run the main loop over all unique site pattern combinations */ +utility.ForEachPair (leisr.site_patterns, "_pattern_", "_pattern_info_", + ' + mpi.QueueJob (leisr.queue, "leisr.handle_a_site", {"0" : "leisr.site_likelihood", + "1" : alignments.serialize_site_filter + ((leisr.filter_specification[0])[terms.data.name], + (_pattern_info_[terms.data.sites])[0]), + "2" : _pattern_info_, + "3" : leisr.site_model_mapping + }, + "leisr.store_results"); + ' +); + +mpi.QueueComplete (leisr.queue); + +leisr.site_rates = utility.Map( utility.Values(utility.Map (leisr.rate_estimates, "_value_", "_value_[terms.fit.MLE]")), "_value_", "0+_value_"); +leisr.stats = math.GatherDescriptiveStats(leisr.site_rates); + +io.ReportProgressMessageMD ("relative_rates", "Stats", "Rate distribution summary"); +io.ReportProgressMessageMD ("relative_rates", "Stats", "* **Mean**: " + Format (leisr.stats[terms.math.mean], 6, 2)); +io.ReportProgressMessageMD ("relative_rates", "Stats", "* **Median**: " + Format (leisr.stats[terms.math.median], 6, 2)); +io.ReportProgressMessageMD ("relative_rates", "Stats", "* **Std.Dev**: " + Format (leisr.stats[terms.math.stddev], 6, 2)); +io.ReportProgressMessageMD ("relative_rates", "Stats", "* **95% Range**: [" + Format (leisr.stats[terms.math._2.5], 5,2) + "," + Format (leisr.stats[terms.math._97.5], 5,2) + "]"); + + +if (leisr.use_rate_variation == "No"){ + leisr.storerv = "None"; +} else { + leisr.storerv = utility.Map( + utility.Map (leisr.alignment_wide_MLES[utility.getGlobalValue("terms.global")], "_value_", ' {terms.fit.MLE : _value_[terms.fit.MLE]}'), + "_value_", + "_value_[terms.fit.MLE]"); +} + +tree_definition = utility.Map (leisr.partitions_and_trees, "_partition_", '_partition_[terms.data.tree]'); +io.SpoolJSON ({ terms.json.input : {terms.json.file: leisr.alignment_info[terms.data.file], + terms.json.sequences: leisr.alignment_info[terms.data.sequences], + terms.json.sites: leisr.alignment_info[terms.data.sites], + terms.json.tree_string: (tree_definition[0])[terms.trees.newick_with_lengths]}, + terms.json.analysis : leisr.analysis_description, + terms.json.relative_site_rates : leisr.rate_estimates, + terms.json.global: {terms.json.model: leisr.baseline_model_name, + terms.model.rate_variation: leisr.storerv, + terms.efv_estimate: (leisr.alignment_wide_MLES[utility.getGlobalValue("terms.efv_estimate")])["VALUEINDEXORDER"][0], + terms.json.tree_string: (leisr.alignment_wide_MLES[terms.fit.trees])[0], + terms.json.log_likelihood: leisr.alignment_wide_MLES[terms.fit.log_likelihood]} + }, + leisr.alignment_info[terms.data.file] + ".site-rates.json"); + + +//---------------------------------------------------------------------------------------- +// HANDLERS +//---------------------------------------------------------------------------------------- + +// fit a rate at a single site + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + +lfunction leisr.handle_a_site (lf, filter_data, pattern_info, model_mapping) { + + + GetString (lfInfo, ^lf,-1); + ExecuteCommands (filter_data); + + __make_filter ((lfInfo["Datafilters"])[0]); + utility.SetEnvVariable ("USE_LAST_RESULTS", TRUE); + + if (pattern_info [utility.getGlobalValue("terms.data.is_constant")]) { + // the MLE for a constant site is 0; + // only the CI is non-trivial + ^(utility.getGlobalValue("leisr.site_model_scaler_name")) = 0; + + } else { + + ^(utility.getGlobalValue("leisr.site_model_scaler_name")) = 1; + Optimize (results, ^lf); + } + + return parameters.GetProfileCI (utility.getGlobalValue("leisr.site_model_scaler_name"), lf, 0.95); +} + + +// handle result processing + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + + +lfunction leisr.store_results (node, result, arguments) { + pattern_info = arguments [2]; + + + if ((^'leisr.table_output_options')[utility.getGlobalValue("terms.table_options.header")]) { + + io.ReportProgressMessageMD ("relative_rates", "sites", "Site rate estimates and associated 95% profile likelihood estimates\n"); + + fprintf (stdout, + io.FormatTableRow (^'leisr.table_screen_output',^'leisr.table_output_options')); + (^'leisr.table_output_options')[utility.getGlobalValue("terms.table_options.header")] = FALSE; + } + + + utility.ForEach (pattern_info[utility.getGlobalValue("terms.data.sites")], "_site_index_", + " + leisr.rate_estimates [_site_index_+1] = `&result`; + result_row = {1,3}; + result_row [0] = '' + (_site_index_ + 1); + result_row [1] = Format((`&result`)[terms.fit.MLE],6,3); + result_row [2] = Format((`&result`)[terms.lower_bound],6,3) + ' :' +Format((`&result`)[terms.upper_bound],6,3); + fprintf (stdout, + io.FormatTableRow (result_row,leisr.table_output_options)); + " + ); + + return rate_statistics; + +} + + + diff --git a/res/TemplateBatchFiles/RELrates.bf b/res/TemplateBatchFiles/RELrates.bf deleted file mode 100644 index 753c0abc5..000000000 --- a/res/TemplateBatchFiles/RELrates.bf +++ /dev/null @@ -1,351 +0,0 @@ -RequireVersion("2.3.5"); - -LoadFunctionLibrary("libv3/UtilityFunctions.bf"); -LoadFunctionLibrary("libv3/IOFunctions.bf"); -LoadFunctionLibrary("libv3/stats.bf"); -LoadFunctionLibrary("libv3/all-terms.bf"); - -LoadFunctionLibrary("libv3/tasks/ancestral.bf"); -LoadFunctionLibrary("libv3/tasks/alignments.bf"); -LoadFunctionLibrary("libv3/tasks/estimators.bf"); -LoadFunctionLibrary("libv3/tasks/trees.bf"); -LoadFunctionLibrary("libv3/tasks/mpi.bf"); -LoadFunctionLibrary("libv3/convenience/math.bf"); -LoadFunctionLibrary("libv3/models/rate_variation.bf"); - -LoadFunctionLibrary("libv3/models/DNA.bf"); -LoadFunctionLibrary("libv3/models/DNA/GTR.bf"); -LoadFunctionLibrary("libv3/models/DNA/HKY85.bf"); -LoadFunctionLibrary("libv3/models/DNA/JC69.bf"); -LoadFunctionLibrary("libv3/models/protein.bf"); -LoadFunctionLibrary("libv3/models/protein/empirical.bf"); - - -/*------------------------------------------------------------------------------*/ - -utility.ToggleEnvVariable ("NORMALIZE_SEQUENCE_NAMES", 1); - -relative_rates.analysis_description = { - terms.io.info: "RELrates: Infer relative amino-acid or nucleotide rates from a fixed nucleotide or amino-acid alignment and tree. Relative site-specific substitution rates are - inferred by first optimizing alignment-wide branch lengths, and then inferring a site-specific uniform tree scaler", - terms.io.version: "0.1alpha", - terms.io.reference: "@TBD. Analysis based on Rate4Site method, : Pupko, T., Bell, R. E., Mayrose, I., Glaser, F. & Ben-Tal, N. Rate4Site: an algorithmic tool for the identification of functional regions in proteins by surface mapping of evolutionary determinants within their homologues. Bioinformatics 18, S71–S77 (2002).", - terms.io.authors: "Sergei L Kosakovsky Pond and Stephanie J Spielman", - terms.io.contact: "{spond,stephanie.spielman}@temple.edu" -}; - -io.DisplayAnalysisBanner(relative_rates.analysis_description); - -/***************************************** MODEL SELECTION **********************************************************/ - -relative_rates.protein_type = "Protein"; -relative_rates.nucleotide_type = "Nucleotide"; -relative_rates.analysis_type = io.SelectAnOption ({{relative_rates.protein_type , "Infer relative rates from a protein (amino-acid) alignment"}, {relative_rates.nucleotide_type, "Infer relative rates from a nucleotide alignment"}}, - "Select your analysis type:"); - - -if (relative_rates.analysis_type == relative_rates.protein_type) { - relative_rates.baseline_model = io.SelectAnOption (models.protein.empirical_models, - "Select a protein model:"); - - relative_rates.use_rate_variation = io.SelectAnOption( {{"Gamma", "Use a four-category discrete gamma distribution when optimizing branch lengths."}, - {"GDD", "Use a four-category general discrete distribution when optimizing branch lengths."}, - {"No", "Do not consider rate variation when optimizing branch lengths."} - }, - "Optimize branch lengths with rate variation?"); - // "Yes", "No" - relative_rates.plusF = io.SelectAnOption ({{"Yes", "Use empirical (+F) amino-acid frequencies ."}, {"No", "Use default amino-acid frequencies."}}, - "Use a +F model for initial branch length optimization?"); - if (relative_rates.plusF == "Yes"){ - relative_rates.generators = models.protein.empirical.plusF_generators; - } - else { - relative_rates.generators = models.protein.empirical.default_generators; - } -} -else { - - relative_rates.baseline_model = io.SelectAnOption (models.DNA.models, - "Select a nucleotide model:"); - relative_rates.use_rate_variation = io.SelectAnOption( {{"Gamma", "Use a four-category discrete gamma distribution when optimizing branch lengths."}, - {"GDD", "Use a four-category general discrete distribution when optimizing branch lengths."}, - {"No", "Do not consider rate variation when optimizing branch lengths."} - }, - "Optimize branch lengths with rate variation?"); - - relative_rates.generators = models.DNA.generators; - relative_rates.plusF = "No"; -} - - - - - -function relative_rates.Baseline.ModelDescription(type){ - def = Call( relative_rates.generators[relative_rates.baseline_model], type); - return def; -} - -function relative_rates.Baseline.ModelDescription.withGamma(type){ - def = relative_rates.Baseline.ModelDescription(type); - def [terms.model.rate_variation] = rate_variation.types.Gamma.factory ({terms.rate_variation.bins : 4}); - return def; -} -function relative_rates.Baseline.ModelDescription.withGDD4(type){ - def = relative_rates.Baseline.ModelDescription(type); - def [terms.model.rate_variation] = rate_variation.types.GDD.factory ({terms.rate_variation.bins : 4}); - return def; -} - - -relative_rates.baseline_model_name = relative_rates.baseline_model; -if (relative_rates.plusF == "Yes"){ - relative_rates.baseline_model_name = relative_rates.baseline_model_name + "+F"; -} - -if (relative_rates.use_rate_variation == "Gamma"){ - relative_rates.baseline_model_name = relative_rates.baseline_model_name + " with 4 category Gamma rates"; - relative_rates.baseline_model_desc = "relative_rates.Baseline.ModelDescription.withGamma"; -} -else { - if (relative_rates.use_rate_variation == "GDD"){ - relative_rates.baseline_model_name = relative_rates.baseline_model_name + " with 4 category GDD rates"; - relative_rates.baseline_model_desc = "relative_rates.Baseline.ModelDescription.withGDD4"; - } - else { - relative_rates.baseline_model_name = relative_rates.baseline_model_name; - relative_rates.baseline_model_desc = "relative_rates.Baseline.ModelDescription"; - } -} -/**************************************************************/ - - - - -/*******************************************************************************************************************/ - - -/***************************************** LOAD DATASET **********************************************************/ -SetDialogPrompt ("Specify a multiple sequence alignment file"); -relative_rates.alignment_info = alignments.ReadNucleotideDataSet ("relative_rates.dataset", NOne); - -name_mapping = relative_rates.alignment_info[utility.getGlobalValue("terms.data.name_mapping")]; -if (None == name_mapping) { - name_mapping = {}; - utility.ForEach (alignments.GetSequenceNames ("relative_rates.dataset"), "_value_", "`&name_mapping`[_value_] = _value_"); -} -relative_rates.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions (relative_rates.alignment_info[utility.getGlobalValue("terms.data.partitions")], name_mapping); -relative_rates.partition_count = Abs (relative_rates.partitions_and_trees); - -io.CheckAssertion ("relative_rates.partition_count==1", "This analysis can only handle a single partition"); - - -io.ReportProgressMessageMD ("relative_rates", "Data", "Input alignment description"); -io.ReportProgressMessageMD ("relative_rates", "Data", "Loaded **" + - relative_rates.alignment_info [terms.data.sequences] + "** sequences, **" + - relative_rates.alignment_info [terms.data.sites] + "** sites, and **" + relative_rates.partition_count + "** partitions from \`" + relative_rates.alignment_info [terms.data.file] + "\`"); -relative_rates.filter_specification = alignments.DefineFiltersForPartitions (relative_rates.partitions_and_trees, "relative_rates.dataset" , "relative_rates.filter.", relative_rates.alignment_info); -/*******************************************************************************************************************/ - - - -/***************************************** INFERENCE **********************************************************/ - - -io.ReportProgressMessageMD ("relative_rates", "overall", "Obtaining alignment-wide branch-length estimates"); - -relative_rates.trees = utility.Map (relative_rates.partitions_and_trees, "_value_", "_value_[terms.data.tree]"); // value => value['tree'] -relative_rates.filter_names = utility.Map (relative_rates.filter_specification, "_value_", "_value_[terms.data.name]"); // value => value['name'] -relative_rates.alignment_wide_MLES = estimators.FitSingleModel_Ext ( - relative_rates.filter_names, - relative_rates.trees, - relative_rates.baseline_model_desc, - None, - None); - - - -estimators.fixSubsetOfEstimates(relative_rates.alignment_wide_MLES, relative_rates.alignment_wide_MLES[terms.global]); - -io.ReportProgressMessageMD ("relative_rates", "overall", ">Fitted an alignment-wide model. **Log-L = " + relative_rates.alignment_wide_MLES [terms.fit.log_likelihood] + "**."); - -/** - Set up the table to display to the screen -*/ - - -relative_rates.table_screen_output = {{"Site", "Rel. rate (MLE)", "95% profile likelihood CI"}}; -relative_rates.table_output_options = {terms.table_options.header : TRUE, terms.table_options.minimum_column_width : 16, terms.table_options.align : "center"}; - -relative_rates.site_patterns = alignments.Extract_site_patterns (relative_rates.filter_names[0]); - -// set-up model for site-level fitting in the next couple of lines, where rv turned off -relative_rates.site_model = model.generic.DefineModel("relative_rates.Baseline.ModelDescription", - "relative_rates_site_model_instance", { - "0": parameters.Quote(terms.global), - }, - relative_rates.filter_names[0], - None); - - - -relative_rates.site_model_mapping = {"relative_rates_site_model_instance" : relative_rates.site_model}; - -// relative_rates.site_tree is created from the information in relative_rates.trees[0] -// and populated with (the default) model -model.ApplyModelToTree( "relative_rates.site_tree", relative_rates.trees[0], {terms.default : relative_rates.site_model}, None); - -// create a site filter; this is an ugly hack for the time being -// alignments.serialize_site_filter returns HBL code as string in -// which the function `__make_filter` is defined. -ExecuteCommands (alignments.serialize_site_filter ( - relative_rates.filter_names[0], - ((relative_rates.site_patterns[0])[terms.data.sites])[0])); - -__make_filter ("relative_rates.site_filter"); - -LikelihoodFunction relative_rates.site_likelihood = (relative_rates.site_filter, relative_rates.site_tree); - -relative_rates.site_model_scaler_name = "relative_rates.site_rate_estimate"; - -relative_rates.rate_estimates = {}; - -/** - this will store site estimates, which will then be dumped to JSON -*/ - -parameters.DeclareGlobal (relative_rates.site_model_scaler_name, None); - -estimators.ApplyExistingEstimates ("relative_rates.site_likelihood", relative_rates.site_model_mapping, relative_rates.alignment_wide_MLES, - {"0" : relative_rates.site_model_scaler_name} // proportional scaler - ); - - -relative_rates.queue = mpi.CreateQueue ({terms.mpi.LikelihoodFunctions: {{"relative_rates.site_likelihood"}}, - terms.mpi.Models : {{"relative_rates.site_model"}}, - terms.mpi.Headers : utility.GetListOfLoadedModules ("libv3/"), - terms.mpi.Variables : {{"relative_rates.site_model_scaler_name"}} - }); - -/* run the main loop over all unique site pattern combinations */ -utility.ForEachPair (relative_rates.site_patterns, "_pattern_", "_pattern_info_", - ' - mpi.QueueJob (relative_rates.queue, "relative_rates.handle_a_site", {"0" : "relative_rates.site_likelihood", - "1" : alignments.serialize_site_filter - ((relative_rates.filter_specification[0])[terms.data.name], - (_pattern_info_[terms.data.sites])[0]), - "2" : _pattern_info_, - "3" : relative_rates.site_model_mapping - }, - "relative_rates.store_results"); - ' -); - -mpi.QueueComplete (relative_rates.queue); - -relative_rates.site_rates = utility.Map( utility.Values(utility.Map (relative_rates.rate_estimates, "_value_", "_value_[terms.fit.MLE]")), "_value_", "0+_value_"); -relative_rates.stats = math.GatherDescriptiveStats(relative_rates.site_rates); - -io.ReportProgressMessageMD ("relative_rates", "Stats", "Rate distribution summary"); -io.ReportProgressMessageMD ("relative_rates", "Stats", "* **Mean**: " + Format (relative_rates.stats[terms.math.mean], 6, 2)); -io.ReportProgressMessageMD ("relative_rates", "Stats", "* **Median**: " + Format (relative_rates.stats[terms.math.median], 6, 2)); -io.ReportProgressMessageMD ("relative_rates", "Stats", "* **Std.Dev**: " + Format (relative_rates.stats[terms.math.stddev], 6, 2)); -io.ReportProgressMessageMD ("relative_rates", "Stats", "* **95% Range**: [" + Format (relative_rates.stats[terms.math._2.5], 5,2) + "," + Format (relative_rates.stats[terms.math._97.5], 5,2) + "]"); - - -if (relative_rates.use_rate_variation == "No"){ - relative_rates.storerv = "None"; -} else { - relative_rates.storerv = utility.Map( - utility.Map (relative_rates.alignment_wide_MLES[utility.getGlobalValue("terms.global")], "_value_", ' {terms.fit.MLE : _value_[terms.fit.MLE]}'), - "_value_", - "_value_[terms.fit.MLE]"); -} - -tree_definition = utility.Map (relative_rates.partitions_and_trees, "_partition_", '_partition_[terms.data.tree]'); -io.SpoolJSON ({ terms.json.input : {terms.json.file: relative_rates.alignment_info[terms.data.file], - terms.json.sequences: relative_rates.alignment_info[terms.data.sequences], - terms.json.sites: relative_rates.alignment_info[terms.data.sites], - terms.json.tree_string: (tree_definition[0])[terms.trees.newick_with_lengths]}, - terms.json.analysis : relative_rates.analysis_description, - terms.json.relative_site_rates : relative_rates.rate_estimates, - terms.json.global: {terms.json.model: relative_rates.baseline_model_name, - terms.model.rate_variation: relative_rates.storerv, - terms.efv_estimate: (relative_rates.alignment_wide_MLES[utility.getGlobalValue("terms.efv_estimate")])["VALUEINDEXORDER"][0], - terms.json.tree_string: (relative_rates.alignment_wide_MLES[terms.fit.trees])[0], - terms.json.log_likelihood: relative_rates.alignment_wide_MLES[terms.fit.log_likelihood]} - }, - relative_rates.alignment_info[terms.data.file] + ".site-rates.json"); - - -//---------------------------------------------------------------------------------------- -// HANDLERS -//---------------------------------------------------------------------------------------- - -// fit a rate at a single site - -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- - -lfunction relative_rates.handle_a_site (lf, filter_data, pattern_info, model_mapping) { - - - GetString (lfInfo, ^lf,-1); - ExecuteCommands (filter_data); - - __make_filter ((lfInfo["Datafilters"])[0]); - utility.SetEnvVariable ("USE_LAST_RESULTS", TRUE); - - if (pattern_info [utility.getGlobalValue("terms.data.is_constant")]) { - // the MLE for a constant site is 0; - // only the CI is non-trivial - ^(utility.getGlobalValue("relative_rates.site_model_scaler_name")) = 0; - - } else { - - ^(utility.getGlobalValue("relative_rates.site_model_scaler_name")) = 1; - Optimize (results, ^lf); - } - - return parameters.GetProfileCI (utility.getGlobalValue("relative_rates.site_model_scaler_name"), lf, 0.95); -} - - -// handle result processing - -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- - - -lfunction relative_rates.store_results (node, result, arguments) { - pattern_info = arguments [2]; - - - if ((^'relative_rates.table_output_options')[utility.getGlobalValue("terms.table_options.header")]) { - - io.ReportProgressMessageMD ("relative_rates", "sites", "Site rate estimates and associated 95% profile likelihood estimates\n"); - - fprintf (stdout, - io.FormatTableRow (^'relative_rates.table_screen_output',^'relative_rates.table_output_options')); - (^'relative_rates.table_output_options')[utility.getGlobalValue("terms.table_options.header")] = FALSE; - } - - - utility.ForEach (pattern_info[utility.getGlobalValue("terms.data.sites")], "_site_index_", - " - relative_rates.rate_estimates [_site_index_+1] = `&result`; - result_row = {1,3}; - result_row [0] = '' + (_site_index_ + 1); - result_row [1] = Format((`&result`)[terms.fit.MLE],6,3); - result_row [2] = Format((`&result`)[terms.lower_bound],6,3) + ' :' +Format((`&result`)[terms.upper_bound],6,3); - fprintf (stdout, - io.FormatTableRow (result_row,relative_rates.table_output_options)); - " - ); - - return rate_statistics; - -} - - - diff --git a/res/TemplateBatchFiles/files.lst b/res/TemplateBatchFiles/files.lst index 226cf7ed6..6141516eb 100644 --- a/res/TemplateBatchFiles/files.lst +++ b/res/TemplateBatchFiles/files.lst @@ -11,7 +11,7 @@ "FEL-Contrast","Use a FEL method to test which sites in a gene may be associated with adaptation to a different environment.","SelectionAnalyses/FEL-contrast.bf"; "","Evolutionary rates on non-coding data.","!Relative evolutionary rate inference"; -"RELrates","Infer relative evolutionary rates on a nucleotide or protein alignment, in the spirit of Rate4Site (PMID: 12169533).","RELrates.bf"; +"LEISR","[LEISR] Infer relative evolutionary rates on a nucleotide or protein alignment, in a spirit similar to Rate4Site (PMID: 12169533).","LEISR.bf"; "","Perform a maximum likelihood analysis on a single file given a single tree.","!Basic Analyses"; "ACD","Analyse codon data with a variery of standard models using given tree.","AnalyzeCodonData.bf"; diff --git a/tests/hbltests/libv3/RELrates.wbf b/tests/hbltests/libv3/LEISR.wbf similarity index 81% rename from tests/hbltests/libv3/RELrates.wbf rename to tests/hbltests/libv3/LEISR.wbf index 2edf896ff..9d4bbc8d9 100644 --- a/tests/hbltests/libv3/RELrates.wbf +++ b/tests/hbltests/libv3/LEISR.wbf @@ -3,7 +3,7 @@ utility.ToggleEnvVariable ("OPTIMIZATION_PRECISION", 1); utility.ToggleEnvVariable ("OPTIMIZATION_TIME_HARD_LIMIT", 1); -LoadFunctionLibrary("RELrates.bf", { +LoadFunctionLibrary("LEISR.bf", { "0": "Protein", "1": "WAG", "2": "Gamma", @@ -13,7 +13,7 @@ LoadFunctionLibrary("RELrates.bf", { }); -LoadFunctionLibrary("RELrates.bf", { +LoadFunctionLibrary("LEISR.bf", { "0": "Protein", "1": "WAG", "2": "GDD", @@ -23,7 +23,7 @@ LoadFunctionLibrary("RELrates.bf", { }); -LoadFunctionLibrary("RELrates.bf", { +LoadFunctionLibrary("LEISR.bf", { "0": "Protein", "1": "WAG", "2": "No", @@ -32,7 +32,7 @@ LoadFunctionLibrary("RELrates.bf", { "5": "Y" }); -LoadFunctionLibrary("RELrates.bf", { +LoadFunctionLibrary("LEISR.bf", { "0": "Nucleotide", "1": "GTR", "2": "Gamma", @@ -40,7 +40,7 @@ LoadFunctionLibrary("RELrates.bf", { "4": "Y" }); -LoadFunctionLibrary("RELrates.bf", { +LoadFunctionLibrary("LEISR.bf", { "0": "Nucleotide", "1": "GTR", "2": "No", From 7be037b181974ada30b06e97c0ef23dd869f096d Mon Sep 17 00:00:00 2001 From: Steven Weaver Date: Mon, 16 Oct 2017 13:38:25 -0400 Subject: [PATCH 14/19] adding ProteinGTRFit and RELrates tests to integration test --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 857961515..f09866086 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,6 +26,8 @@ env: - METHOD_TEST=tests/hbltests/libv3/RELAX.wbf - METHOD_TEST=tests/hbltests/libv3/aBSREL.wbf - METHOD_TEST=tests/hbltests/libv3/BUSTED.wbf + - METHOD_TEST=tests/hbltests/libv3/ProteinGTRFit.wbf + - METHOD_TEST=tests/hbltests/libv3/Relrates.wbf language: c++ compiler: From 786905e2e7f47aec1cec2a5ad31bf50c5d221c5b Mon Sep 17 00:00:00 2001 From: Steven Weaver Date: Mon, 16 Oct 2017 13:43:06 -0400 Subject: [PATCH 15/19] updating relrates name --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f09866086..b9113cc2b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,7 +27,7 @@ env: - METHOD_TEST=tests/hbltests/libv3/aBSREL.wbf - METHOD_TEST=tests/hbltests/libv3/BUSTED.wbf - METHOD_TEST=tests/hbltests/libv3/ProteinGTRFit.wbf - - METHOD_TEST=tests/hbltests/libv3/Relrates.wbf + - METHOD_TEST=tests/hbltests/libv3/LEISR.wbf language: c++ compiler: From 44e5b88a68e5eed02650a7256d81a181276fdd86 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 16 Oct 2017 16:15:31 -0400 Subject: [PATCH 16/19] Added mtMAM model of AA evolution --- .../libv3/models/protein.bf | 3 +- .../libv3/models/protein/empirical.bf | 387 +++++++++++++----- 2 files changed, 293 insertions(+), 97 deletions(-) diff --git a/res/TemplateBatchFiles/libv3/models/protein.bf b/res/TemplateBatchFiles/libv3/models/protein.bf index f7b261b77..dd00da4cd 100644 --- a/res/TemplateBatchFiles/libv3/models/protein.bf +++ b/res/TemplateBatchFiles/libv3/models/protein.bf @@ -13,7 +13,8 @@ models.protein.alphabet = {{"A","C","D","E","F","G","H","I","K","L","M","N","P", models.protein.empirical_models = {{"LG", "Empirical model of protein evolution from Le and Gascuel (2008). Ref: https://doi.org/10.1093/molbev/msn067"}, {"WAG", "Empirical model of protein evolution from Whelan and Goldman (2001). Ref: https://doi.org/10.1093/oxfordjournals.molbev.a003851"}, {"JTT", "Empirical model of protein evolution from Jones, Taylor, and Thornton (1996). Ref: https://doi.org/10.1093/bioinformatics/8.3.275"}, - {"JC69", "Empirical model of protein evolution with equal exchangeability rates among all amino acids, also known as JC69."}}; + {"JC69", "Empirical model of protein evolution with equal exchangeability rates among all amino acids, also known as JC69."}, + {"mtMAM", "Empirical model of protein evolution for mammalian mitochondrial genomes from Yang N, Nielsen R, and Hasegawa M. (1998). Ref: http://dx.doi.org/10.1093/oxfordjournals.molbev.a025888"}}; models.protein.dimensions = 20; diff --git a/res/TemplateBatchFiles/libv3/models/protein/empirical.bf b/res/TemplateBatchFiles/libv3/models/protein/empirical.bf index f716c46be..3957c79f9 100644 --- a/res/TemplateBatchFiles/libv3/models/protein/empirical.bf +++ b/res/TemplateBatchFiles/libv3/models/protein/empirical.bf @@ -9,12 +9,14 @@ LoadFunctionLibrary("../../all-terms.bf"); models.protein.empirical.default_generators = {"LG": "models.protein.LG.ModelDescription", "WAG": "models.protein.WAG.ModelDescription", "JTT": "models.protein.JTT.ModelDescription", - "JC69": "models.protein.JC69.ModelDescription"}; + "JC69": "models.protein.JC69.ModelDescription", + "mtMAM": "models.protein.mtMAM.ModelDescription"}; models.protein.empirical.plusF_generators = {"LG": "models.protein.LGF.ModelDescription", "WAG": "models.protein.WAGF.ModelDescription", "JTT": "models.protein.JTTF.ModelDescription", - "JC69": "models.protein.JC69F.ModelDescription"}; + "JC69": "models.protein.JC69F.ModelDescription", + "mtMAM": "models.protein.mtMAMF.ModelDescription"}; /** @module models.protein.empirical */ @@ -376,7 +378,7 @@ function models.protein.JC69.ModelDescription(type) { /** * @name models.protein.JC69F.ModelDescription - * @description Create the baseline schema (dictionary) for the WJC69AG+F model of protein evolution + * @description Create the baseline schema (dictionary) for the JC69+F model of protein evolution * @returns {Dictionary} model description * @param {String} type */ @@ -388,6 +390,37 @@ function models.protein.JC69F.ModelDescription(type) { } + +/**************************************** mtMAM functions *************************************/ + + + /** + * @name models.protein.mtMAM.ModelDescription + * @description Create the baseline schema (dictionary) for the mtMAM model of protein evolution + * @returns {Dictionary} model description + * @param {String} type + */ +function models.protein.mtMAM.ModelDescription(type) { + models.protein.mtMAM.ModelDescription.model_definition = models.protein.empirical.ModelDescription(type); + models.protein.mtMAM.ModelDescription.model_definition [terms.model.empirical_rates] = models.protein.mtMAM.Rij; + models.protein.mtMAM.ModelDescription.model_definition [terms.model.frequency_estimator] = "models.protein.mtMAM.frequencies"; + return models.protein.mtMAM.ModelDescription.model_definition; +} + +/** + * @name models.protein.mtMAMF.ModelDescription + * @description Create the baseline schema (dictionary) for the mtMAM+F model of protein evolution + * @returns {Dictionary} model description + * @param {String} type + */ +function models.protein.mtMAMF.ModelDescription(type) { + models.protein.mtMAMF.ModelDescription.model_definition = models.protein.mtMAM.ModelDescription(type); + models.protein.mtMAMF.ModelDescription.model_definition [terms.model.frequency_estimator] = "frequencies.empirical.protein"; + models.protein.mtMAMF.ModelDescription.model_definition [terms.model.efv_estimate_name] = utility.getGlobalValue("terms.frequencies._20x1"); + return models.protein.mtMAMF.ModelDescription.model_definition; +} + + /*=============================================================================================*/ /** Below this section are all of the empirical matrices and frequency vectors, including Rij **/ @@ -429,7 +462,7 @@ lfunction models.protein.WAG.frequencies (model, namespace, datafilter) { } -/* Define a dictionary of amino-acid exchangeability rates for the WAG model of protein evolution. Note that this dictionary has been **normalized** using the WAG default frequencies. */ +/* Define a dictionary of amino-acid exchangeability rates for the WAG model of protein evolution. */ models.protein.WAG.Rij = { "A":{ "C":0.02081175, @@ -702,7 +735,7 @@ lfunction models.protein.LG.frequencies (model, namespace, datafilter) { return model; } -/* Define a dictionary of amino-acid exchangeability rates for the LG model of protein evolution. Note that this dictionary has been **normalized** using the LG default frequencies. */ +/* Define a dictionary of amino-acid exchangeability rates for the LG model of protein evolution. */ models.protein.LG.Rij = { "A":{ "C":0.03522956, @@ -974,7 +1007,7 @@ function models.protein.JTT.frequencies (model, namespace, datafilter) { return model; } -/* Define a dictionary of amino-acid exchangeability rates for the JTT model of protein evolution. Note that this dictionary has been **normalized** using the JTT default frequencies. */ +/* Define a dictionary of amino-acid exchangeability rates for the JTT model of protein evolution. */ models.protein.JTT.Rij = { "A":{ "C":0.01164983, @@ -1245,7 +1278,7 @@ lfunction models.protein.JC69.frequencies (model, namespace, datafilter) { return model; } -/* Define a dictionary of equal amino-acid exchangeability rates for the JC69 model of protein evolution. Note that this dictionary has been **normalized** using the JC69 default frequencies. */ +/* Define a dictionary of equal amino-acid exchangeability rates for the JC69 model of protein evolution. */ models.protein.JC69.Rij = { "A": {"C":0.05, @@ -1460,95 +1493,257 @@ models.protein.JC69.Rij = { -/************************************ R_ij matrices ***************************************/ -// -// -// models.protein.empirical.WAG.empirical_R = {{0., 1.027039999999999953, 7.389980000000000437e-01, 1.58285000000000009, 2.104939999999999867e-01, 1.416719999999999979, 3.169540000000000135e-01, 1.933350000000000068e-01, 9.062649999999999872e-01, 3.979150000000000187e-01, 8.934959999999999569e-01, 5.098479999999999679e-01, 1.438549999999999995, 9.085980000000000167e-01, 5.515710000000000335e-01, 3.370789999999999953, 2.121109999999999829, 2.006009999999999849, 1.131329999999999975e-01, 2.407350000000000045e-01}, -// {1.027039999999999953, 0., 3.029489999999999961e-02, 2.135199999999999945e-02, 3.980199999999999849e-01, 3.066740000000000022e-01, 2.489719999999999989e-01, 1.701350000000000084e-01, 7.403389999999999971e-02, 3.842869999999999897e-01, 3.904819999999999958e-01, 2.652559999999999918e-01, 1.094040000000000012e-01, 9.881790000000000018e-02, 5.281909999999999661e-01, 1.407659999999999911, 5.129839999999999955e-01, 1.002140000000000031, 7.170699999999999852e-01, 5.438330000000000108e-01}, -// {7.389980000000000437e-01, 3.029489999999999961e-02, 0., 6.174159999999999648, 4.673039999999999833e-02, 8.655840000000000201e-01, 9.306759999999999478e-01, 3.943699999999999983e-02, 4.79854999999999976e-01, 8.480469999999999675e-02, 1.03753999999999999e-01, 5.429420000000000357, 4.239840000000000275e-01, 6.167829999999999702e-01, 1.473039999999999905e-01, 1.071760000000000046, 3.74865999999999977e-01, 1.523349999999999982e-01, 1.297669999999999935e-01, 3.257109999999999728e-01}, -// {1.58285000000000009, 2.135199999999999945e-02, 6.174159999999999648, 0., 8.113389999999999491e-02, 5.677170000000000272e-01, 5.700250000000000039e-01, 1.273950000000000082e-01, 2.584429999999999783, 1.542630000000000112e-01, 3.151240000000000152e-01, 9.471979999999999844e-01, 6.823550000000000448e-01, 5.469470000000000276, 4.391570000000000196e-01, 7.049389999999999823e-01, 8.227649999999999686e-01, 5.887310000000000043e-01, 1.565570000000000017e-01, 1.963030000000000053e-01}, -// {2.104939999999999867e-01, 3.980199999999999849e-01, 4.673039999999999833e-02, 8.113389999999999491e-02, 0., 4.993100000000000316e-02, 6.793709999999999471e-01, 1.059469999999999912, 8.88359999999999983e-02, 2.115169999999999995, 1.190630000000000077, 9.616210000000000035e-02, 1.614440000000000042e-01, 9.992080000000000406e-02, 1.027109999999999967e-01, 5.459310000000000551e-01, 1.719030000000000002e-01, 6.498920000000000252e-01, 1.529640000000000111, 6.454279999999999795}, -// {1.416719999999999979, 3.066740000000000022e-01, 8.655840000000000201e-01, 5.677170000000000272e-01, 4.993100000000000316e-02, 0., 2.494099999999999928e-01, 3.045010000000000078e-02, 3.735580000000000012e-01, 6.130370000000000263e-02, 1.741000000000000048e-01, 1.125559999999999894, 2.435700000000000087e-01, 3.30052000000000012e-01, 5.846649999999999903e-01, 1.341820000000000013, 2.25833000000000006e-01, 1.872469999999999968e-01, 3.36982999999999977e-01, 1.036040000000000016e-01}, -// {3.169540000000000135e-01, 2.489719999999999989e-01, 9.306759999999999478e-01, 5.700250000000000039e-01, 6.793709999999999471e-01, 2.494099999999999928e-01, 0., 1.381900000000000073e-01, 8.904320000000000013e-01, 4.994620000000000171e-01, 4.041409999999999725e-01, 3.956290000000000084, 6.961979999999999835e-01, 4.294109999999999872, 2.137150000000000105, 7.401689999999999658e-01, 4.733069999999999777e-01, 1.183580000000000049e-01, 2.625689999999999968e-01, 3.873439999999999994}, -// {1.933350000000000068e-01, 1.701350000000000084e-01, 3.943699999999999983e-02, 1.273950000000000082e-01, 1.059469999999999912, 3.045010000000000078e-02, 1.381900000000000073e-01, 0., 3.238320000000000087e-01, 3.170970000000000066, 4.257460000000000022, 5.542359999999999509e-01, 9.992879999999999818e-02, 1.139170000000000044e-01, 1.869790000000000063e-01, 3.194400000000000017e-01, 1.458159999999999901, 7.821299999999999919, 2.124830000000000052e-01, 4.201699999999999879e-01}, -// {9.062649999999999872e-01, 7.403389999999999971e-02, 4.79854999999999976e-01, 2.584429999999999783, 8.88359999999999983e-02, 3.735580000000000012e-01, 8.904320000000000013e-01, 3.238320000000000087e-01, 0., 2.575549999999999784e-01, 9.342759999999999954e-01, 3.012010000000000076, 5.568959999999999466e-01, 3.894899999999999807, 5.351420000000000066, 9.671300000000000452e-01, 1.386980000000000102, 3.054339999999999833e-01, 1.375049999999999883e-01, 1.332639999999999936e-01}, -// {3.979150000000000187e-01, 3.842869999999999897e-01, 8.480469999999999675e-02, 1.542630000000000112e-01, 2.115169999999999995, 6.130370000000000263e-02, 4.994620000000000171e-01, 3.170970000000000066, 2.575549999999999784e-01, 0., 4.854020000000000223, 1.31528000000000006e-01, 4.158439999999999914e-01, 8.694889999999999564e-01, 4.976709999999999745e-01, 3.447390000000000176e-01, 3.266220000000000234e-01, 1.800340000000000051, 6.653090000000000392e-01, 3.986179999999999723e-01}, -// {8.934959999999999569e-01, 3.904819999999999958e-01, 1.03753999999999999e-01, 3.151240000000000152e-01, 1.190630000000000077, 1.741000000000000048e-01, 4.041409999999999725e-01, 4.257460000000000022, 9.342759999999999954e-01, 4.854020000000000223, 0., 1.982210000000000083e-01, 1.713290000000000091e-01, 1.545260000000000078, 6.83162000000000047e-01, 4.939049999999999829e-01, 1.516119999999999912, 2.058450000000000113, 5.157059999999999977e-01, 4.284370000000000123e-01}, -// {5.098479999999999679e-01, 2.652559999999999918e-01, 5.429420000000000357, 9.471979999999999844e-01, 9.616210000000000035e-02, 1.125559999999999894, 3.956290000000000084, 5.542359999999999509e-01, 3.012010000000000076, 1.31528000000000006e-01, 1.982210000000000083e-01, 0., 1.950810000000000044e-01, 1.543639999999999901, 6.353459999999999663e-01, 3.974229999999999929, 2.030060000000000198, 1.962460000000000038e-01, 7.191670000000000007e-02, 1.086000000000000076}, -// {1.438549999999999995, 1.094040000000000012e-01, 4.239840000000000275e-01, 6.823550000000000448e-01, 1.614440000000000042e-01, 2.435700000000000087e-01, 6.961979999999999835e-01, 9.992879999999999818e-02, 5.568959999999999466e-01, 4.158439999999999914e-01, 1.713290000000000091e-01, 1.950810000000000044e-01, 0., 9.333719999999999795e-01, 6.794890000000000096e-01, 1.613280000000000047, 7.953839999999999799e-01, 3.148869999999999725e-01, 1.394050000000000011e-01, 2.160459999999999881e-01}, -// {9.085980000000000167e-01, 9.881790000000000018e-02, 6.167829999999999702e-01, 5.469470000000000276, 9.992080000000000406e-02, 3.30052000000000012e-01, 4.294109999999999872, 1.139170000000000044e-01, 3.894899999999999807, 8.694889999999999564e-01, 1.545260000000000078, 1.543639999999999901, 9.333719999999999795e-01, 0., 3.035499999999999865, 1.028869999999999951, 8.579280000000000239e-01, 3.01281000000000021e-01, 2.157370000000000121e-01, 2.277099999999999957e-01}, -// {5.515710000000000335e-01, 5.281909999999999661e-01, 1.473039999999999905e-01, 4.391570000000000196e-01, 1.027109999999999967e-01, 5.846649999999999903e-01, 2.137150000000000105, 1.869790000000000063e-01, 5.351420000000000066, 4.976709999999999745e-01, 6.83162000000000047e-01, 6.353459999999999663e-01, 6.794890000000000096e-01, 3.035499999999999865, 0., 1.224189999999999889, 5.544130000000000447e-01, 2.518489999999999895e-01, 1.163920000000000066, 3.81533000000000011e-01}, -// {3.370789999999999953, 1.407659999999999911, 1.071760000000000046, 7.049389999999999823e-01, 5.459310000000000551e-01, 1.341820000000000013, 7.401689999999999658e-01, 3.194400000000000017e-01, 9.671300000000000452e-01, 3.447390000000000176e-01, 4.939049999999999829e-01, 3.974229999999999929, 1.613280000000000047, 1.028869999999999951, 1.224189999999999889, 0., 4.378020000000000245, 2.327390000000000014e-01, 5.237420000000000408e-01, 7.869930000000000536e-01}, -// {2.121109999999999829, 5.129839999999999955e-01, 3.74865999999999977e-01, 8.227649999999999686e-01, 1.719030000000000002e-01, 2.25833000000000006e-01, 4.733069999999999777e-01, 1.458159999999999901, 1.386980000000000102, 3.266220000000000234e-01, 1.516119999999999912, 2.030060000000000198, 7.953839999999999799e-01, 8.579280000000000239e-01, 5.544130000000000447e-01, 4.378020000000000245, 0., 1.388230000000000075, 1.108640000000000042e-01, 2.911480000000000179e-01}, -// {2.006009999999999849, 1.002140000000000031, 1.523349999999999982e-01, 5.887310000000000043e-01, 6.498920000000000252e-01, 1.872469999999999968e-01, 1.183580000000000049e-01, 7.821299999999999919, 3.054339999999999833e-01, 1.800340000000000051, 2.058450000000000113, 1.962460000000000038e-01, 3.148869999999999725e-01, 3.01281000000000021e-01, 2.518489999999999895e-01, 2.327390000000000014e-01, 1.388230000000000075, 0., 3.653689999999999993e-01, 3.147300000000000098e-01}, -// {1.131329999999999975e-01, 7.170699999999999852e-01, 1.297669999999999935e-01, 1.565570000000000017e-01, 1.529640000000000111, 3.36982999999999977e-01, 2.625689999999999968e-01, 2.124830000000000052e-01, 1.375049999999999883e-01, 6.653090000000000392e-01, 5.157059999999999977e-01, 7.191670000000000007e-02, 1.394050000000000011e-01, 2.157370000000000121e-01, 1.163920000000000066, 5.237420000000000408e-01, 1.108640000000000042e-01, 3.653689999999999993e-01, 0., 2.48539000000000021}, -// {2.407350000000000045e-01, 5.438330000000000108e-01, 3.257109999999999728e-01, 1.963030000000000053e-01, 6.454279999999999795, 1.036040000000000016e-01, 3.873439999999999994, 4.201699999999999879e-01, 1.332639999999999936e-01, 3.986179999999999723e-01, 4.284370000000000123e-01, 1.086000000000000076, 2.160459999999999881e-01, 2.277099999999999957e-01, 3.81533000000000011e-01, 7.869930000000000536e-01, 2.911480000000000179e-01, 3.147300000000000098e-01, 2.48539000000000021, 0.}}; -// -// models.protein.empirical.LG.empirical_R = {{0., 2.489084000000000074, 3.951439999999999952e-01, 1.038545000000000051, 2.537010000000000098e-01, 2.066040000000000099, 3.588580000000000103e-01, 1.49829999999999991e-01, 5.365180000000000504e-01, 3.95336999999999994e-01, 1.124034999999999895, 2.768180000000000085e-01, 1.177651000000000003, 9.698940000000000339e-01, 4.250929999999999986e-01, 4.727181999999999995, 2.139501000000000097, 2.547870000000000079, 1.807169999999999888e-01, 2.18958999999999987e-01}, -// {2.489084000000000074, 0., 6.255600000000000049e-02, 3.49899999999999994e-03, 1.105250999999999983, 5.69265000000000021e-01, 6.405429999999999735e-01, 3.206269999999999953e-01, 1.326600000000000001e-02, 5.940069999999999517e-01, 8.936800000000000299e-01, 5.287680000000000158e-01, 7.538200000000000456e-02, 8.48079999999999945e-02, 5.345509999999999984e-01, 2.784478000000000009, 1.143480000000000052, 1.959290999999999894, 6.701279999999999459e-01, 1.165532000000000012}, -// {3.951439999999999952e-01, 6.255600000000000049e-02, 0., 5.243870000000000253, 1.741600000000000092e-02, 8.449259999999999549e-01, 9.271139999999999937e-01, 1.068999999999999985e-02, 2.829590000000000161e-01, 1.507600000000000086e-02, 2.554800000000000126e-02, 5.076149000000000022, 3.944559999999999733e-01, 5.233860000000000179e-01, 1.239539999999999947e-01, 1.240275000000000016, 4.258600000000000163e-01, 3.796700000000000075e-02, 2.98899999999999999e-02, 1.351070000000000049e-01}, -// {1.038545000000000051, 3.49899999999999994e-03, 5.243870000000000253, 0., 1.881100000000000133e-02, 3.488470000000000182e-01, 4.238810000000000078e-01, 4.426499999999999879e-02, 1.807177000000000033, 6.967299999999999882e-02, 1.737350000000000005e-01, 5.417119999999999713e-01, 4.194089999999999763e-01, 4.128591000000000122, 3.639700000000000157e-01, 6.119729999999999892e-01, 6.04544999999999999e-01, 2.450340000000000018e-01, 7.785200000000000453e-02, 1.200370000000000048e-01}, -// {2.537010000000000098e-01, 1.105250999999999983, 1.741600000000000092e-02, 1.881100000000000133e-02, 0., 8.958599999999999897e-02, 6.821390000000000509e-01, 1.112727000000000022, 2.391799999999999829e-02, 2.592691999999999997, 1.798853000000000035, 8.952499999999999347e-02, 9.44640000000000063e-02, 3.585499999999999798e-02, 5.272199999999999803e-02, 3.618190000000000017e-01, 1.650010000000000088e-01, 6.546830000000000149e-01, 2.457120999999999889, 7.803901999999999894}, -// {2.066040000000000099, 5.69265000000000021e-01, 8.449259999999999549e-01, 3.488470000000000182e-01, 8.958599999999999897e-02, 0., 3.114839999999999831e-01, 8.704999999999999197e-03, 2.966360000000000108e-01, 4.426100000000000173e-02, 1.395379999999999954e-01, 1.437645000000000062, 1.969609999999999972e-01, 2.679590000000000027e-01, 3.901919999999999833e-01, 1.739989999999999926, 1.29836000000000007e-01, 7.670100000000000529e-02, 2.684909999999999797e-01, 5.467899999999999844e-02}, -// {3.588580000000000103e-01, 6.405429999999999735e-01, 9.271139999999999937e-01, 4.238810000000000078e-01, 6.821390000000000509e-01, 3.114839999999999831e-01, 0., 1.088820000000000066e-01, 6.972639999999999949e-01, 3.663170000000000037e-01, 4.424719999999999764e-01, 4.509237999999999857, 5.088510000000000533e-01, 4.813505000000000145, 2.426600999999999786, 9.900120000000000031e-01, 5.842619999999999481e-01, 1.190129999999999938e-01, 5.970539999999999736e-01, 5.306834000000000273}, -// {1.49829999999999991e-01, 3.206269999999999953e-01, 1.068999999999999985e-02, 4.426499999999999879e-02, 1.112727000000000022, 8.704999999999999197e-03, 1.088820000000000066e-01, 0., 1.590689999999999882e-01, 4.145067000000000057, 4.273607000000000156, 1.915030000000000066e-01, 7.828100000000000336e-02, 7.285400000000000209e-02, 1.269909999999999928e-01, 6.410499999999999532e-02, 1.033738999999999963, 6.491069999999999895e-01, 1.116599999999999954e-01, 2.325230000000000075e-01}, -// {5.365180000000000504e-01, 1.326600000000000001e-02, 2.829590000000000161e-01, 1.807177000000000033, 2.391799999999999829e-02, 2.966360000000000108e-01, 6.972639999999999949e-01, 1.590689999999999882e-01, 0., 1.375000000000000111e-01, 6.566039999999999655e-01, 2.145077999999999818, 3.903220000000000023e-01, 3.23429399999999978, 6.326067000000000107, 7.486829999999999874e-01, 1.136862999999999957, 1.852020000000000055e-01, 4.990599999999999897e-02, 1.319319999999999937e-01}, -// {3.95336999999999994e-01, 5.940069999999999517e-01, 1.507600000000000086e-02, 6.967299999999999882e-02, 2.592691999999999997, 4.426100000000000173e-02, 3.663170000000000037e-01, 4.145067000000000057, 1.375000000000000111e-01, 0., 6.312357999999999691, 6.842700000000000171e-02, 2.490600000000000036e-01, 5.824570000000000025e-01, 3.018480000000000052e-01, 1.822870000000000046e-01, 3.029359999999999831e-01, 1.702744999999999953, 6.196319999999999606e-01, 2.996480000000000254e-01}, -// {1.124034999999999895, 8.936800000000000299e-01, 2.554800000000000126e-02, 1.737350000000000005e-01, 1.798853000000000035, 1.395379999999999954e-01, 4.424719999999999764e-01, 4.273607000000000156, 6.566039999999999655e-01, 6.312357999999999691, 0., 3.710040000000000004e-01, 9.984899999999999332e-02, 1.672568999999999972, 4.8413299999999998e-01, 3.469599999999999906e-01, 2.020366000000000106, 1.898717999999999906, 6.961749999999999883e-01, 4.813060000000000116e-01}, -// {2.768180000000000085e-01, 5.287680000000000158e-01, 5.076149000000000022, 5.417119999999999713e-01, 8.952499999999999347e-02, 1.437645000000000062, 4.509237999999999857, 1.915030000000000066e-01, 2.145077999999999818, 6.842700000000000171e-02, 3.710040000000000004e-01, 0., 1.617869999999999864e-01, 1.695751999999999926, 7.518780000000000463e-01, 4.00835800000000031, 2.000678999999999874, 8.36879999999999985e-02, 4.537599999999999967e-02, 6.120250000000000412e-01}, -// {1.177651000000000003, 7.538200000000000456e-02, 3.944559999999999733e-01, 4.194089999999999763e-01, 9.44640000000000063e-02, 1.969609999999999972e-01, 5.088510000000000533e-01, 7.828100000000000336e-02, 3.903220000000000023e-01, 2.490600000000000036e-01, 9.984899999999999332e-02, 1.617869999999999864e-01, 0., 6.242940000000000156e-01, 3.32533000000000023e-01, 1.338132000000000099, 5.71467999999999976e-01, 2.965010000000000145e-01, 9.513099999999999334e-02, 8.961299999999999821e-02}, -// {9.698940000000000339e-01, 8.48079999999999945e-02, 5.233860000000000179e-01, 4.128591000000000122, 3.585499999999999798e-02, 2.679590000000000027e-01, 4.813505000000000145, 7.285400000000000209e-02, 3.23429399999999978, 5.824570000000000025e-01, 1.672568999999999972, 1.695751999999999926, 6.242940000000000156e-01, 0., 2.807907999999999848, 1.223827999999999916, 1.080135999999999985, 2.103319999999999912e-01, 2.361989999999999923e-01, 2.573360000000000092e-01}, -// {4.250929999999999986e-01, 5.345509999999999984e-01, 1.239539999999999947e-01, 3.639700000000000157e-01, 5.272199999999999803e-02, 3.901919999999999833e-01, 2.426600999999999786, 1.269909999999999928e-01, 6.326067000000000107, 3.018480000000000052e-01, 4.8413299999999998e-01, 7.518780000000000463e-01, 3.32533000000000023e-01, 2.807907999999999848, 0., 8.581509999999999971e-01, 5.789870000000000294e-01, 1.708870000000000111e-01, 5.936069999999999958e-01, 3.144399999999999973e-01}, -// {4.727181999999999995, 2.784478000000000009, 1.240275000000000016, 6.119729999999999892e-01, 3.618190000000000017e-01, 1.739989999999999926, 9.900120000000000031e-01, 6.410499999999999532e-02, 7.486829999999999874e-01, 1.822870000000000046e-01, 3.469599999999999906e-01, 4.00835800000000031, 1.338132000000000099, 1.223827999999999916, 8.581509999999999971e-01, 0., 6.472279000000000337, 9.836899999999999811e-02, 2.488619999999999999e-01, 4.005469999999999864e-01}, -// {2.139501000000000097, 1.143480000000000052, 4.258600000000000163e-01, 6.04544999999999999e-01, 1.650010000000000088e-01, 1.29836000000000007e-01, 5.842619999999999481e-01, 1.033738999999999963, 1.136862999999999957, 3.029359999999999831e-01, 2.020366000000000106, 2.000678999999999874, 5.71467999999999976e-01, 1.080135999999999985, 5.789870000000000294e-01, 6.472279000000000337, 0., 2.188158000000000047, 1.408250000000000057e-01, 2.45841000000000004e-01}, -// {2.547870000000000079, 1.959290999999999894, 3.796700000000000075e-02, 2.450340000000000018e-01, 6.546830000000000149e-01, 7.670100000000000529e-02, 1.190129999999999938e-01, 6.491069999999999895e-01, 1.852020000000000055e-01, 1.702744999999999953, 1.898717999999999906, 8.36879999999999985e-02, 2.965010000000000145e-01, 2.103319999999999912e-01, 1.708870000000000111e-01, 9.836899999999999811e-02, 2.188158000000000047, 0., 1.895100000000000118e-01, 2.493130000000000068e-01}, -// {1.807169999999999888e-01, 6.701279999999999459e-01, 2.98899999999999999e-02, 7.785200000000000453e-02, 2.457120999999999889, 2.684909999999999797e-01, 5.970539999999999736e-01, 1.116599999999999954e-01, 4.990599999999999897e-02, 6.196319999999999606e-01, 6.961749999999999883e-01, 4.537599999999999967e-02, 9.513099999999999334e-02, 2.361989999999999923e-01, 5.936069999999999958e-01, 2.488619999999999999e-01, 1.408250000000000057e-01, 1.895100000000000118e-01, 0., 3.151815000000000033}, -// {2.18958999999999987e-01, 1.165532000000000012, 1.351070000000000049e-01, 1.200370000000000048e-01, 7.803901999999999894, 5.467899999999999844e-02, 5.306834000000000273, 2.325230000000000075e-01, 1.319319999999999937e-01, 2.996480000000000254e-01, 4.813060000000000116e-01, 6.120250000000000412e-01, 8.961299999999999821e-02, 2.573360000000000092e-01, 3.144399999999999973e-01, 4.005469999999999864e-01, 2.45841000000000004e-01, 2.493130000000000068e-01, 3.151815000000000033, 0.}}; -// -// models.protein.empirical.JTT.empirical_R = {{0., 5.744780000000000442e-01, 8.274449999999999861e-01, 1.06668099999999999, 1.382929999999999993e-01, 1.740159000000000011, 2.199699999999999989e-01, 3.616840000000000055e-01, 3.694370000000000154e-01, 3.10006999999999977e-01, 4.693950000000000067e-01, 5.579669999999999908e-01, 1.959599000000000091, 5.567250000000000254e-01, 5.316779999999999839e-01, 3.887094999999999967, 4.582564999999999777, 2.924160999999999788, 8.432900000000000118e-02, 1.394920000000000049e-01}, -// {5.744780000000000442e-01, 0., 1.056249999999999967e-01, 5.390699999999999659e-02, 6.783350000000000213e-01, 5.463890000000000136e-01, 7.249980000000000313e-01, 1.505589999999999984e-01, 4.900899999999999701e-02, 1.645929999999999893e-01, 4.092020000000000102e-01, 3.133110000000000062e-01, 1.23652999999999999e-01, 9.130399999999999627e-02, 1.019843000000000055, 2.155330999999999886, 4.698229999999999906e-01, 6.213229999999999587e-01, 1.104181000000000079, 2.114851999999999954}, -// {8.274449999999999861e-01, 1.056249999999999967e-01, 0., 7.766556999999999711, 3.25220000000000023e-02, 1.272434000000000065, 1.032342000000000093, 1.159680000000000016e-01, 2.824659999999999949e-01, 6.148599999999999899e-02, 1.900010000000000032e-01, 5.549529999999999852, 1.271639999999999993e-01, 5.216460000000000541e-01, 1.548990000000000089e-01, 5.89268000000000014e-01, 4.251590000000000091e-01, 3.152610000000000134e-01, 5.746600000000000319e-02, 4.539520000000000222e-01}, -// {1.06668099999999999, 5.390699999999999659e-02, 7.766556999999999711, 0., 4.38289999999999999e-02, 1.115631999999999957, 2.437680000000000125e-01, 1.117729999999999974e-01, 1.731684000000000001, 9.748500000000000221e-02, 1.750839999999999896e-01, 5.781150000000000455e-01, 1.91993999999999998e-01, 3.417705999999999911, 3.184830000000000161e-01, 3.124489999999999768e-01, 3.315839999999999899e-01, 4.652709999999999901e-01, 1.143809999999999966e-01, 6.345199999999999452e-02}, -// {1.382929999999999993e-01, 6.783350000000000213e-01, 3.25220000000000023e-02, 4.38289999999999999e-02, 0., 5.021199999999999969e-02, 4.534279999999999977e-01, 7.770899999999999475e-01, 2.452100000000000113e-02, 2.500293999999999794, 4.361809999999999854e-01, 7.348100000000000465e-02, 1.484830000000000039e-01, 4.568300000000000138e-02, 6.531399999999999706e-02, 9.439710000000000045e-01, 1.389039999999999997e-01, 5.9347799999999995e-01, 5.379220000000000113e-01, 5.484236000000000111}, -// {1.740159000000000011, 5.463890000000000136e-01, 1.272434000000000065, 1.115631999999999957, 5.021199999999999969e-02, 0., 2.016959999999999864e-01, 5.376899999999999735e-02, 2.698400000000000243e-01, 6.949199999999999822e-02, 1.303789999999999949e-01, 7.733130000000000281e-01, 2.080809999999999882e-01, 2.312939999999999996e-01, 1.359652000000000083, 1.874295999999999962, 3.16861999999999977e-01, 4.701400000000000023e-01, 5.441799999999999971e-01, 5.249999999999999806e-02}, -// {2.199699999999999989e-01, 7.249980000000000313e-01, 1.032342000000000093, 2.437680000000000125e-01, 4.534279999999999977e-01, 2.016959999999999864e-01, 0., 1.817880000000000051e-01, 5.250960000000000072e-01, 5.405710000000000237e-01, 3.296600000000000086e-01, 4.025777999999999857, 1.141961000000000004, 5.684079999999999799, 3.210671000000000053, 7.434579999999999522e-01, 4.773549999999999738e-01, 1.218270000000000047e-01, 1.281930000000000014e-01, 5.848399999999999821}, -// {3.616840000000000055e-01, 1.505589999999999984e-01, 1.159680000000000016e-01, 1.117729999999999974e-01, 7.770899999999999475e-01, 5.376899999999999735e-02, 1.817880000000000051e-01, 0., 2.025619999999999921e-01, 2.335138999999999854, 4.831666000000000238, 4.910030000000000228e-01, 9.858000000000000096e-02, 7.827000000000000624e-02, 2.39194999999999991e-01, 4.051190000000000069e-01, 2.553805999999999798, 9.533943000000000723, 1.345099999999999907e-01, 3.034450000000000203e-01}, -// {3.694370000000000154e-01, 4.900899999999999701e-02, 2.824659999999999949e-01, 1.731684000000000001, 2.452100000000000113e-02, 2.698400000000000243e-01, 5.250960000000000072e-01, 2.025619999999999921e-01, 0., 1.464810000000000001e-01, 6.245810000000000528e-01, 2.529516999999999793, 2.163450000000000095e-01, 2.966731999999999925, 6.529255000000000031, 4.744780000000000109e-01, 9.656409999999999716e-01, 1.240659999999999957e-01, 8.913400000000000489e-02, 8.790399999999999603e-02}, -// {3.10006999999999977e-01, 1.645929999999999893e-01, 6.148599999999999899e-02, 9.748500000000000221e-02, 2.500293999999999794, 6.949199999999999822e-02, 5.405710000000000237e-01, 2.335138999999999854, 1.464810000000000001e-01, 0., 3.856905999999999946, 1.372889999999999944e-01, 1.060503999999999891, 7.090039999999999676e-01, 3.722610000000000086e-01, 5.925110000000000099e-01, 2.725139999999999785e-01, 1.761438999999999977, 5.303240000000000176e-01, 2.410940000000000027e-01}, -// {4.693950000000000067e-01, 4.092020000000000102e-01, 1.900010000000000032e-01, 1.750839999999999896e-01, 4.361809999999999854e-01, 1.303789999999999949e-01, 3.296600000000000086e-01, 4.831666000000000238, 6.245810000000000528e-01, 3.856905999999999946, 0., 3.30720000000000014e-01, 1.642149999999999999e-01, 4.569010000000000016e-01, 4.310450000000000115e-01, 2.855639999999999845e-01, 2.114727999999999941, 3.03853300000000015, 2.01334000000000013e-01, 1.89870000000000011e-01}, -// {5.579669999999999908e-01, 3.133110000000000062e-01, 5.549529999999999852, 5.781150000000000455e-01, 7.348100000000000465e-02, 7.733130000000000281e-01, 4.025777999999999857, 4.910030000000000228e-01, 2.529516999999999793, 1.372889999999999944e-01, 3.30720000000000014e-01, 0., 1.218039999999999956e-01, 7.688340000000000174e-01, 4.510950000000000237e-01, 5.057964000000000127, 2.351310999999999929, 1.645250000000000046e-01, 2.769999999999999893e-02, 7.006930000000000103e-01}, -// {1.959599000000000091, 1.23652999999999999e-01, 1.271639999999999993e-01, 1.91993999999999998e-01, 1.484830000000000039e-01, 2.080809999999999882e-01, 1.141961000000000004, 9.858000000000000096e-02, 2.163450000000000095e-01, 1.060503999999999891, 1.642149999999999999e-01, 1.218039999999999956e-01, 0., 1.608125999999999944, 7.104890000000000372e-01, 2.788406000000000162, 1.176960999999999924, 2.115609999999999991e-01, 6.996499999999999941e-02, 1.138500000000000068e-01}, -// {5.567250000000000254e-01, 9.130399999999999627e-02, 5.216460000000000541e-01, 3.417705999999999911, 4.568300000000000138e-02, 2.312939999999999996e-01, 5.684079999999999799, 7.827000000000000624e-02, 2.966731999999999925, 7.090039999999999676e-01, 4.569010000000000016e-01, 7.688340000000000174e-01, 1.608125999999999944, 0., 3.021994999999999987, 5.488070000000000448e-01, 5.238249999999999851e-01, 1.797709999999999864e-01, 1.72205999999999998e-01, 2.547449999999999992e-01}, -// {5.316779999999999839e-01, 1.019843000000000055, 1.548990000000000089e-01, 3.184830000000000161e-01, 6.531399999999999706e-02, 1.359652000000000083, 3.210671000000000053, 2.39194999999999991e-01, 6.529255000000000031, 3.722610000000000086e-01, 4.310450000000000115e-01, 4.510950000000000237e-01, 7.104890000000000372e-01, 3.021994999999999987, 0., 1.00155100000000008, 6.502820000000000267e-01, 1.71995000000000009e-01, 1.257961000000000107, 2.356010000000000049e-01}, -// {3.887094999999999967, 2.155330999999999886, 5.89268000000000014e-01, 3.124489999999999768e-01, 9.439710000000000045e-01, 1.874295999999999962, 7.434579999999999522e-01, 4.051190000000000069e-01, 4.744780000000000109e-01, 5.925110000000000099e-01, 2.855639999999999845e-01, 5.057964000000000127, 2.788406000000000162, 5.488070000000000448e-01, 1.00155100000000008, 0., 4.777646999999999977, 4.085320000000000062e-01, 3.109270000000000089e-01, 6.286079999999999446e-01}, -// {4.582564999999999777, 4.698229999999999906e-01, 4.251590000000000091e-01, 3.315839999999999899e-01, 1.389039999999999997e-01, 3.16861999999999977e-01, 4.773549999999999738e-01, 2.553805999999999798, 9.656409999999999716e-01, 2.725139999999999785e-01, 2.114727999999999941, 2.351310999999999929, 1.176960999999999924, 5.238249999999999851e-01, 6.502820000000000267e-01, 4.777646999999999977, 0., 1.143979999999999997, 8.05560000000000026e-02, 2.01093999999999995e-01}, -// {2.924160999999999788, 6.213229999999999587e-01, 3.152610000000000134e-01, 4.652709999999999901e-01, 5.9347799999999995e-01, 4.701400000000000023e-01, 1.218270000000000047e-01, 9.533943000000000723, 1.240659999999999957e-01, 1.761438999999999977, 3.03853300000000015, 1.645250000000000046e-01, 2.115609999999999991e-01, 1.797709999999999864e-01, 1.71995000000000009e-01, 4.085320000000000062e-01, 1.143979999999999997, 0., 2.396969999999999934e-01, 1.65473000000000009e-01}, -// {8.432900000000000118e-02, 1.104181000000000079, 5.746600000000000319e-02, 1.143809999999999966e-01, 5.379220000000000113e-01, 5.441799999999999971e-01, 1.281930000000000014e-01, 1.345099999999999907e-01, 8.913400000000000489e-02, 5.303240000000000176e-01, 2.01334000000000013e-01, 2.769999999999999893e-02, 6.996499999999999941e-02, 1.72205999999999998e-01, 1.257961000000000107, 3.109270000000000089e-01, 8.05560000000000026e-02, 2.396969999999999934e-01, 0., 7.47889000000000026e-01}, -// {1.394920000000000049e-01, 2.114851999999999954, 4.539520000000000222e-01, 6.345199999999999452e-02, 5.484236000000000111, 5.249999999999999806e-02, 5.848399999999999821, 3.034450000000000203e-01, 8.790399999999999603e-02, 2.410940000000000027e-01, 1.89870000000000011e-01, 7.006930000000000103e-01, 1.138500000000000068e-01, 2.547449999999999992e-01, 2.356010000000000049e-01, 6.286079999999999446e-01, 2.01093999999999995e-01, 1.65473000000000009e-01, 7.47889000000000026e-01, 0.}}; -// -// models.protein.empirical.JC69.empirical_R = {{0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, -// {1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, -// {1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, -// {1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, -// {1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, -// {1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, -// {1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, -// {1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, -// {1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, -// {1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, -// {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, -// {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1.}, -// {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1.}, -// {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1.}, -// {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1.}, -// {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1.}, -// {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1.}, -// {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1.}, -// {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1.}, -// {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0.}}; -// -// -// +//==================================================================================================================// + + + +/** + * @name models.protein.mtMAM.frequencies + * @param {Dictionary} Baseline mtMAM model + * @returns {Dictionary} Updated mtMAM model with empirical frequencies + * @description Define the empirical amino acid frequencies associated with the mtMAM model of protein evolution + */ +lfunction models.protein.mtMAM.frequencies (model, namespace, datafilter) { + model[utility.getGlobalValue("terms.efv_estimate")] = + {{ 0.0692} + { 0.0065} + { 0.0186} + { 0.0236} + { 0.0611} + { 0.0557} + { 0.0277} + { 0.0905} + { 0.0221} + { 0.1675} + { 0.0561} + { 0.04} + { 0.0536} + { 0.0238} + { 0.0184} + { 0.0725} + { 0.087} + { 0.0428} + { 0.0293} + { 0.034} + }; + + model[utility.getGlobalValue("terms.model.efv_estimate_name")] = utility.getGlobalValue("terms.frequencies.predefined"); + (model[utility.getGlobalValue("terms.parameters")])[utility.getGlobalValue("terms.model.empirical")] = 0; + return model; +} + +/* Define a dictionary of equal amino-acid exchangeability rates for the MTMam model of protein evolution. */ +models.protein.mtMAM.Rij = { + "A": + {"C": 0.0, + "D": 0.11, + "E": 0.0, + "F": 0.0, + "G": 0.78, + "H": 0.08, + "I": 0.75, + "K": 0.0, + "L": 0.21, + "M": 0.76, + "N": 0.02, + "P": 0.53, + "Q": 0.0, + "R": 0.32, + "S": 3.42, + "T": 6.81, + "V": 3.98, + "W": 0.05, + "Y": 0.0}, + "C": + {"D": 0.0, + "E": 0.0, + "F": 0.07, + "G": 0.0, + "H": 3.05, + "I": 0.41, + "K": 0.0, + "L": 0.27, + "M": 0.0, + "N": 0.0, + "P": 0.0, + "Q": 0.0, + "R": 1.86, + "S": 3.47, + "T": 1.14, + "V": 0.0, + "W": 0.65, + "Y": 5.3}, + "D": + {"E": 5.69, + "F": 0.05, + "G": 0.79, + "H": 0.11, + "I": 0.0, + "K": 0.0, + "L": 0.0, + "M": 0.0, + "N": 8.64, + "P": 0.02, + "Q": 0.49, + "R": 0.0, + "S": 0.16, + "T": 0.0, + "V": 0.1, + "W": 0.0, + "Y": 0.0}, + "E": + {"F": 0.0, + "G": 0.22, + "H": 0.22, + "I": 0.0, + "K": 2.15, + "L": 0.0, + "M": 0.0, + "N": 0.0, + "P": 0.0, + "Q": 2.74, + "R": 0.0, + "S": 0.21, + "T": 0.04, + "V": 0.2, + "W": 0.0, + "Y": 0.0}, + "F": + {"G": 0.0, + "H": 0.0, + "I": 0.57, + "K": 0.0, + "L": 2.46, + "M": 0.11, + "N": 0.06, + "P": 0.17, + "Q": 0.0, + "R": 0.0, + "S": 0.9, + "T": 0.08, + "V": 0.06, + "W": 0.0, + "Y": 6.82}, + "G": + {"H": 0.0, + "I": 0.0, + "K": 0.0, + "L": 0.0, + "M": 0.0, + "N": 0.47, + "P": 0.0, + "Q": 0.0, + "R": 0.18, + "S": 1.12, + "T": 0.0, + "V": 0.05, + "W": 0.0, + "Y": 0.01}, + "H": + {"I": 0.0, + "K": 0.0, + "L": 0.26, + "M": 0.0, + "N": 4.58, + "P": 0.53, + "Q": 5.5, + "R": 2.32, + "S": 0.2, + "T": 0.01, + "V": 0.0, + "W": 0.0, + "Y": 15.25}, + "I": + {"K": 0.06, + "L": 2.32, + "M": 3.78, + "N": 0.19, + "P": 0.05, + "Q": 0.0, + "R": 0.0, + "S": 0.0, + "T": 3.6, + "V": 22.2, + "W": 0.0, + "Y": 0.16}, + "K": {"L": 0.04, + "M": 0.59, + "N": 4.08, + "P": 0.18, + "Q": 2.42, + "R": 0.5, + "S": 0.65, + "T": 0.5, + "V": 0.0, + "W": 0.0, + "Y": 0.67}, + "L": + {"M": 6.09, + "N": 0.0, + "P": 0.43, + "Q": 0.2, + "R": 0.06, + "S": 0.74, + "T": 0.34, + "V": 1.0, + "W": 0.12, + "Y": 0.25}, + "M": + {"N": 0.21, + "P": 0.0, + "Q": 0.22, + "R": 0.0, + "S": 0.47, + "T": 6.91, + "V": 8.32, + "W": 0.13, + "Y": 0.0}, + "N": + {"P": 0.33, + "Q": 0.08, + "R": 0.04, + "S": 4.46, + "T": 1.1, + "V": 0.0, + "W": 0.06, + "Y": 1.56}, + "P": + {"Q": 0.51, + "R": 0.09, + "S": 2.02, + "T": 0.78, + "V": 0.0, + "W": 0.07, + "Y": 0.08}, + "Q": + {"R": 2.46, + "S": 0.3, + "T": 0.0, + "V": 0.33, + "W": 0.0, + "Y": 0.54}, + "R": + {"S": 0.03, + "T": 0.0, + "V": 0.0, + "W": 0.16, + "Y": 0.0}, + "S": + {"T": 6.14, + "V": 0.0, + "W": 0.17, + "Y": 1.07}, + "T": + {"V": 2.37, + "W": 0.0, + "Y": 0.0}, + "V": + {"W": 0.0, + "Y": 0.0}, + "W": + {"Y": 0.14} +}; + From 0147bc78e166358295e254b27f54292369ae82fc Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 16 Oct 2017 16:28:59 -0400 Subject: [PATCH 17/19] Updated code and data for ProteinFitter.wbf to achieve faster testing --- tests/hbltests/libv3/ProteinGTRFit.wbf | 6 +- .../data/protgtr_fitter_alignments/aa1.fasta | 37 ++++ .../data/protgtr_fitter_alignments/aa2.fasta | 37 ++++ .../data/protgtr_fitter_alignments/aa3.fasta | 37 ++++ .../data/protgtr_fitter_alignments/prot1.dat | 120 ----------- .../data/protgtr_fitter_alignments/prot2.dat | 76 ------- .../data/protgtr_fitter_alignments/prot3.dat | 184 ---------------- .../data/protgtr_fitter_alignments/prot4.dat | 110 ---------- .../data/protgtr_fitter_alignments/prot5.dat | 200 ------------------ .../libv3/data/protgtr_fitter_lines_raw.txt | 8 +- 10 files changed, 117 insertions(+), 698 deletions(-) create mode 100644 tests/hbltests/libv3/data/protgtr_fitter_alignments/aa1.fasta create mode 100644 tests/hbltests/libv3/data/protgtr_fitter_alignments/aa2.fasta create mode 100644 tests/hbltests/libv3/data/protgtr_fitter_alignments/aa3.fasta delete mode 100644 tests/hbltests/libv3/data/protgtr_fitter_alignments/prot1.dat delete mode 100644 tests/hbltests/libv3/data/protgtr_fitter_alignments/prot2.dat delete mode 100644 tests/hbltests/libv3/data/protgtr_fitter_alignments/prot3.dat delete mode 100644 tests/hbltests/libv3/data/protgtr_fitter_alignments/prot4.dat delete mode 100644 tests/hbltests/libv3/data/protgtr_fitter_alignments/prot5.dat diff --git a/tests/hbltests/libv3/ProteinGTRFit.wbf b/tests/hbltests/libv3/ProteinGTRFit.wbf index ffef8dfed..91b5c2855 100644 --- a/tests/hbltests/libv3/ProteinGTRFit.wbf +++ b/tests/hbltests/libv3/ProteinGTRFit.wbf @@ -31,10 +31,10 @@ utility.ForEach(file_list2, "_value_", "writeNewPath(_value_)"); LoadFunctionLibrary("ProteinGTRFit.bf", { "0": PATH_TO_CURRENT_BF + "data/protgtr_fitter_lines.txt", - "1": "LogL", // use logL convergence - "2": "0.1", // stopping + "1": "RMSE", // use rmse convergence + "2": "1", // stopping "3": "WAG", // use WAG for baseline - "4": "Gamma", // use a gamma for rate variation + "4": "No", // use no rate variation }); diff --git a/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa1.fasta b/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa1.fasta new file mode 100644 index 000000000..a36e45339 --- /dev/null +++ b/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa1.fasta @@ -0,0 +1,37 @@ +>t8 +GDTAADSYLDLVSMPFDVLGLRLRLGALIMAPYFVNGSARKLITWALAYSRVGIPGHGKK +EHPLYGDYTWCGMEQTFQNQGYGHMTAIRVGWPSAADRTA +>t9 +ADTATSAYLVAVSTGGETLGSRRANHNEVTTQYFDGSEGRKNFIWALMYLKVGFCEHGKK +ERPIFGQHNSCAIGTVIEKGNNAAYNDTECGTAKHRGSDA +>t6 +GSTGADTYLDLVSMPFNVLGLRLRLNCLVMAPYFVNGAAKKLFVWSLAYARVGIPGHGKK +DHPLYGNYSWCGMGQIFQNQGYGHLTAIRVGWPSAADRTA +>t7 +ADTATTIYQTAVSTNWETLGLRRANHNEVTTQYFDGPEGEKLFIWALLYLKVAFCEHGKK +RRPIFGQHDAVAIGHVVERENNAASNDPECGLALHVGCDA +>t4 +AETATSTYLVAVSTNWETLGLRRANHNEVTTRYFDGSEGPKNFIWALMTLKVGFCAHGKK +ERPIIGLHNSCGIGPVIEKSNNLAINRTACGAASHVGSDA +>t5 +GSTAADSYLDLVSMPFDVLGLQLMLGALVMAPYFVHGSTRKLFKWALAYSRVGVPGHGKK +EHPLYGNYTWCGMGQIFQNQGYGHMTAIRVGWPSAADRQA +>t2 +GSTGAGMYPDCISTPEDVLGLSLALDARVRGKFLSDKRARRLLVWSLAYAMVGTIGHPKI +EHPLLGDFSWCNLGHVYESSDYQHGTAVRCGWPSDPERNA +>t3 +PRTGRDSYIGVISMDINVLSLALADHIETMDQILNKKEGQSLFIWGLIKARVGLTGHGKK +ERSIFGDFDACTLATAYSKEDFAVTDANRCNYESAPAGDA +>t1 +ENTSTNKYFEWVSTDFDTLGLKIANIVEIISYYFDRNQGNKLIIWGLVYTRVGLCGHGKK +NQPFFGDYTECGIGTIFRKEEFAANTQSRCGFRHAPGSAA +>t10 +IDPSTNAYLTGVSENVENLGLKKATANEVALQYFEAPAGEKLFIMTLLYLKVESMTNAKR +FDPIFGVHDSVAMAHLVEKENNAAGLDSECGLKSIVGCDP +>t11 +GSTGAGTWPDCVSTPDEVLGVSLALDARVRGLFLSDARARRYLIWYLAYARVGLVGHPKV +EHPLFGDFSWCNVGHVFESSDYQHATTVRCGWPSEPPRNA +>t12 +ARTPTDGYFEVVSTDTDTLGLALANQVEVMTQYFDRQQGTKLFIWGLIYARVGVTGHGKK +NRPIFGDYTHCGMATVYNKEDFAASTASRHGFPSTPGSDA +(t1:0.1408208979,((((t4:0.09180754325,t9:0.07128012371):0.1313551269,(t10:0.4165821702,t7:0.04764077778):0.08831922431):0.6377018364,(t3:0.6987412621,t12:0.08350678342):0.04692053027):0.07418762848,(((t5:0.08652378838,t8:0.05359766493):0.103280468,t6:0.08857364883):0.250248048,(t2:0.074322189,t11:0.1835713):0.644012):0.4382723628):0.12478); \ No newline at end of file diff --git a/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa2.fasta b/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa2.fasta new file mode 100644 index 000000000..fc55ae0d1 --- /dev/null +++ b/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa2.fasta @@ -0,0 +1,37 @@ +>t8 +AGAKSGEGGTASFYYSQTNLDVAKGVLFLMVPQGEWAKESIFSSLRCWQSVKESTILITH +RLIQLNRPDANYELAAGSIWAHYDQFLGGLETNAFYLTAI +>t9 +SGATSGEGSMWSLFYTKVEGELTLGQVHEKKPGAGTALRAFGDLERCLELLVPVIGAAID +RNSELTENAINVKGAAGLAHAAYASFTDIRERGGAWIALL +>t6 +AGAKSGEGGTAALYNSQTDLDVAKGVVFLMVPEGEWAKESVLSSLRCWQSVEESTVVITY +RLVELHRSDVNYELAAGSIWAHYNEFLASLEAAAFYLTAL +>t7 +SGATSAEGAMASLYYTKVEGELSLGEMHKKKPKAGTELRPLGDLERCLELLVPSIGAAID +RNSELSKNAINVALAAGLVHAAYASYTDIREHGGAWIALC +>t4 +SGATSGEGSMRSLFYCKVEGELSLGQVHDKKPGAGTALRAVGSLYRCLELLVPAIGAAID +RNSELTENAINVKLATGLAQAAYASFTNIRERGGAWIALL +>t5 +AAAKSGEGGTASFYYSQTELDVAKGVLFLMVPEGEWAKESVFSSLRCWQSVEESTILITH +RFVQLQRPDANYELAAGSIWAHYDQFLGGLETNAFYLTAV +>t2 +KGAVTYQGNTASLHYTKDDDQGSQGVVALGTPELDWAKKAHEIRLRGQQNVSASLTVQLD +NQEEFQKFEINYRRVSGFVVAYCSAYPSQVETSVYYMTLT +>t3 +TGETEGVGDTKDLYYGHTEGQLVAGVTHDQAEKVGTSGRACATLDSCVGSVVPSLSAWID +RTVALGKSEMRCELVDGYIYSDHQIFIKVIVVGALYISQL +>t1 +SGANAGEGDMVSLYFAWSDGEVAKGVVHEMVPKSGMEPRAPQTLVRCLQSVLPPPTAVVD +RIVELNRSELNYDLAAGLSYASYAEFLNVLETGAFYISVV +>t10 +SGKTSGDGAMASLYYTKVPGEDSLGEVHEKRGKSGTDMGAEGDHKKCLTLFLPSIGAAVD +RDTEFSKKDLNLVLAASLVHSAYAAIADIHEHGGYWTAVC +>t11 +KGAMTDEGNTAKLQYTTDNSQVSHGVVVLGTPELDWAKSAHEVRLAGEQNISPALTVQLD +NQEEFQKFEINYRSVSGFVRAQCSASPSQVQTSVYHMLCT +>t12 +SGATSGEGDMASLYYSKAEGEVAKAVVHELVPKSGTAPRAIETLDGCLQSVVPALSAVID +RTVELSPSELNCDMAAGLIYADYAAFINILETGALYIALL +(t1:0.1408208979,((((t4:0.09180754325,t9:0.07128012371):0.1313551269,(t10:0.4165821702,t7:0.04764077778):0.08831922431):0.6377018364,(t3:0.6987412621,t12:0.08350678342):0.04692053027):0.07418762848,(((t5:0.08652378838,t8:0.05359766493):0.103280468,t6:0.08857364883):0.250248048,(t2:0.074322189,t11:0.1835713):0.644012):0.4382723628):0.12478); \ No newline at end of file diff --git a/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa3.fasta b/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa3.fasta new file mode 100644 index 000000000..62bd5455e --- /dev/null +++ b/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa3.fasta @@ -0,0 +1,37 @@ +>t8 +QPSNEIVLEVGNGIETCVYKADNSDATKFHVDEPHLEAKTPVTTGLDEIRQPSTTVSLLS +IEYSPHAAQRHSETVVISVNGKLQGGEDLYIIPRISCNMK +>t9 +QPKADLAPEVGTKLVNCAFKCSDQSGHRFHIDFSAEKLAPAQSRTLKSYLNPPFSVEVLP +LSFNPQKVQKAPDCIMVSAEGGFEGGAGAFTPFRFSCKLT +>t6 +QPSDELVLEVGNGTETCVFKADNSDATKFHIDEPLMEAKTPRTTGLDEIRKPSTSVDLLS +IEYTPHADQKHADTVVINVNGDLQGGEDLYIIPRISAHVK +>t7 +DPRTDLTPDVGTKLVNCAFKCSDQSGHRFRIDYPADKLAPEQTRSLKKFLNPEISIAELH +LSFSPQKIQKSPDCILISAEGGFEGGAGAFTALRWSCKFT +>t4 +QPKKDLAPPVGTKMVNCAFKCSDQSGHRFHIDYSAEKLAPSQTRITKKYLNPPFSVAVLP +LNFNPDKVQKSPDCIIISAEGGFEGGAGAFTSFRFSCKLA +>t5 +QPSNELVLEVGNGIQTCVFKADNSDATKFHVDEPHLEVKTPLTTGLDEIRQPSTRVSLLS +IEYSPHPAQQHSETVVTSVNGKLQGGEDLYIIPRISCNMK +>t2 +EPTFSLTLTVSIVMPICVLKEETGVAIVFHVDESALETNAQRTAGFKTLKSPATSAVLVP +VDYKPMTAQKHEDPALFKTQKDLDTADKFFVRTYISCNIE +>t3 +AAVVDLKVSVGHVPITCRLKCNDRFTKNFTVQASGLETADRKTISFAPLLGPDFTIAILP +LEWKPQESQEMPDAISVTAEGSFASGVKAFYSSSFSCNLV +>t1 +DPAVDLKLDIGSAVITCALSNDYRSATRFHLDKPALATPTAHTTAWNPLLAPDLSRAILS +LEYTPTKAQKAADTIAITIEGEYPGEKRAFAPTRYNQQFT +>t10 +NPRLDLTPDFGTPLVSCAFPASNQSGHRFEINYPPDRLAPEEQRSLKKFMNPEISITNLH +QSYNPEAIKKNPDCVLISAEGGFNGGAGAFTAFHLSCTFT +>t11 +EPTFDLTLTVSIVMPICVLKANSGVAIVYHVDESALETNAQRTAGFKTLKSPVTNVVLNP +VDYNPQTAQKHEDTALFRTQKDLDTADKFFVRTYIACNIA +>t12 +DPAVDLKLDVGRALVTCALKCDDRYAKRLHIDEGAVETATPQETALQPLLGPDFTVAILP +LEYTPQKAQEAPDTITITVEGSFSGGAKAFAASRFSCNLI +(t1:0.1408208979,((((t4:0.09180754325,t9:0.07128012371):0.1313551269,(t10:0.4165821702,t7:0.04764077778):0.08831922431):0.6377018364,(t3:0.6987412621,t12:0.08350678342):0.04692053027):0.07418762848,(((t5:0.08652378838,t8:0.05359766493):0.103280468,t6:0.08857364883):0.250248048,(t2:0.074322189,t11:0.1835713):0.644012):0.4382723628):0.12478); \ No newline at end of file diff --git a/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot1.dat b/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot1.dat deleted file mode 100644 index d29584397..000000000 --- a/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot1.dat +++ /dev/null @@ -1,120 +0,0 @@ ->t49 -LPMKGPKVLVTGDVSGNPTTHSAKLDTVLYPNNKEDLGVNVSDKNVIPIQSERQRLLHTLLEIRKMCTHGDIILRLTSGEIIPIKLAEGGLEASMLNNRRVHMDGCEKDDQKLDILVKDVQIKYNQAVRIFGIASDFSCQDRSVAE ->t48 -LKIRDPNVLLSNLVTDDLVSKNAKLATVNRSKIKPEQGVVITVNRTVPKKGHRTEYELYYIEFLQVLYKTESVMLFAKSGRMPIRLDQGSVEDYYLKAKRHYLEGCRKIEGCVDMALRTLNENFDAIGKLIFLKSPYYILASAVLE ->t43 -YTLTEWKVLVTGDVGENAVNHRRPIASVLYHNAKEELKTSMSDSRVLPIGTEGDKLSMRNIAWRKITQHGSAILKIIAGVEIPIKLKKGFFQDILLSLKRYVIEGCQRDNQQLQILARNMQMKFNTTTRRFSLTHPFAATETFVTQ ->t42 -YAHNNFLQVIAGDFERKPDSSVEGLANTISEKWCQTQVRCLNGSHVIGILENKPGSDIILMPAKEMVFDNDTLLNLIEIVLPLKVIPRSIFFSHTHAAQQYYFMGEFKDDAKIGLIARALEPLIQVIYGRFGINFEYSIEGPAFGR ->t41 -YSQTQWAALVSGGVDQEPRQENKQFSELVHDDIRDDKDQKPETTSLCAHKCQKDQSSILIIAYKQMSRDQDSYLKLIQVHLAILKLKKGLISDLFLFNKRYYLKAEVGDEVRISLLVRSVKPLLSDTYRNFAIEIPYICENASTGE ->t40 -FPLREVELIVGGGLSHEPVGAYSHLEGVLFKSVTPDTSTTASDDNTVPQEPEPQELTMLLAGYFFLMHFGDNLAKLMEGGIIPMCFEEGGIEDTFLGTRRFHLEGLKRDPPPLDAVMRELRISFCDLVKICGIKLPYACTLTSALD ->t47 -GNKTDMVAIVAADVDNEPAENEKQYSSTGNREMTNRRFNTPEGGEICPFKARKQRLNLTPIPYKLIVKEPDKKYRMWDTTLSMVRKKNGILEYVILATQEYWFVNDFQEEVHLPLVENEFKPRFEESYKAWALNIVYHVEGSSTGA ->t46 -LEIKNPKTLVTGDIKAYPSGRSTDLDTSIFPPNIQALATNATDPQVVFIATGRQRVVYALAKLRIVIQHGDLILRLTRGEIIAIKLEESSITSSMLNNRRLYLEGCEKNDRNIDNMMKEVRIRYNQMVRIIAMASEYSCKTSAVSE ->t45 -IQLRDPKVLLSGNVTSDTVSKSSKLVTVNRGNIIKDLNVVLSDSKTVVNEGDMTEFSLFYLEFLKVVRLGEAIFLFVKNGILAIKLEEGTAADLFLKTRSHFLNGCQREAPPVDVFIKTVQLNFDVLGAMLLLRSPYGILASRVLE ->t44 -IPIKDPKVLVTNNVSNDEVPHSSNLVTVNYQNINQEYGVNLSDSKTVPLQGERTEFTIFFLEFLKMVQHGDAIFRIIANSIIPIKLEHGGAEDVYLNTRRWHMDGCRREDPPVDVYVKDLQIQFDQMEKLFSIKSPYSVQATQALE ->t1 -YANTQHSLLVSGGVDDEPQRQGKQLLRLVNDNLRGDKSPTQLSTNLCAHKAQKDQFSILMIPYKQMRRYRDAYFKLIHVTLAIVKLKRGLTKDLLLYQRKYYLRAETKDEVRLSLLVRNVEPLLNNTYHNFQTEIPYICEYASSNE ->t14 -YTHTDFLRVVCGDVDEEPEQDGETNGQAANELFFENRVRCLRKEYLCTIAQVKPGRDIWIIPYKQIAVEANTLFKFIEATSDLVKITKQIMTNISLYNRKVNFLASVKDELEMNFIVRHLKPLENEFYKSFPMGIPYALEATAFGR ->t15 -FGLREPTLIVGAGLSDESPANTSHVVSVLYPIVRPATNKAAEDDPTMPTYDESDQLVTMILEWFCILHWTQGDLRILSGGIIQPCFEHGGREDTLIKTQLIKLNRYRREDPPLNALVKELKIGFADAVRDLGIRKPYACKLTSALD ->t16 -LPLKEPKVLVTGQVSDKPAAHEANMVKVLNGSSKDALGVDTDDERTVKIQSEQRGMRLFWLEFLELVQHGDAIVQMICGGEIRIRIEEGGLVQVLFNIRRYSINGGQADDPPLEPYSKDLQIKFTDMMRITGLASRHQCEVTQVLD ->t17 -YAQTQWSLLVSGGVDEEPNREGKKFTQLVNDNLGDDKNQTPLSTNLVSHKQQKDQFSIAIIPYKQMNRLRDAYFKLISVTLAILKLKKGLNKDLFLFHRRYYLRAEVKDEVRLSLLVRNVKPLLNNRYHSFATEIPYICEYASSNE ->t10 -FRASAQHTGFRTPFYKRPQSACRDKAEYYYGLAKTDIGVQTDDRRDELAEMQLKLHSLYYIDFLSIKSSGDEKFKKILSALIKIKLHESGLLETAINSARFKFFGHQKDPRPRDIFLEAFKLTIPSTYNKKSMAAFTRKLARVVLD ->t11 -LPLTNPRMLLVDEASANPTTHAANLDTVVYPNYEEDMGVDVSDYKVVPIQSPRQRLLHTLLEIRKLCRLGDIILRLTQGEIIPIKIEEGGLEAPMLNNRRLIMDGCERENEKFEIIVRDVEKKFNQMVRIFGLASDFACGDSSLVE ->t38 -HSRMKWTVFVTGSVEADPVIYSASMQEASYQQLNDAMGKTAVGSAAIVIETEDDKVTLENIPYRETDKRHESFMNVIMGQEMPIRLREGFLAQITFSDPKYYMDGNVKRDVKMNLLVQDLQPVLGTIFRRFDIRKPFYCSETTVKE ->t39 -CTHTEWVAIPAGDADDGPAEEGQAYNQTGDQNFQNQRVNTPEGSSICPFKARKQRMNLLPIPFKMAVKEHDKYYKIVEVAISMTKLKNGILEHVILSTQNYYFLNELQDEVHLPLIENEPKGKMEEKYRVWAINIFYLIDGAATGQ ->t36 -ISAKKQRNGLKVSYIKGPESDCTTKVSFYYGKAKARIGMDVDNSRDQKIALQIMSLTLFFVEYYKMRPLGDSETKRLMSSLLRIRANKGGLIELMINVERYQFFGYTKDEVQHNMFLRAYKMELPGIRYLRGMLAFASHIVSVELD ->t37 -YTHTEWVVTLAGDVDEDPRQNGESFGQTANQGLCNNRVRTLQGSSICPFQAQKDQMELLIIPYKQISKEHDCLFKMIEATIDIARFKKSILQNIALHNREYYYLAEMKDEVHLSFIVRHLKPLLREAYQAYAIGIPYIFEGAATGN ->t34 -IPIKDPQKLVTNNVSQSELCYPSNLVTVNFQDINREYGVNLADSETLPLLGETTDFSIFFLEFLKLVQHGEAIMRIIANSLIVIKLQHGGREDVYLNSKRWHMDVCRTDDPPVGGYVRDLQVKFDSIDKLFDIKAPYNVLASPALE ->t35 -YSQNQWSVLVASGVDEEPRQEGKQFTELVHDNLRDDKDQTPLSTSLAAQKAQKDQFSILIIPYKQMSRHQDAYFKLLQVTHAILKLKNGLIKDLFLFNRRYYLQKHVEDEVRLSLLVRIVNVLLDNLYRNLASEIPYICQYAESTK ->t32 -YTHQQFIQTIAGDFDEKPEQSGESIASTANEVWCQNRFRCLNGSYVCGILQTKPGMDIIIMPYQQISTENNNILKLIEAVIPLYVLTRAILAQISYYSHEYYFMGDVRDDVHLNFIVRQLKPLIEELYKAFGMNFPYALEGTAFGR ->t33 -IKLKSPAILVVCSGSDAPCPQSRYLVAMEFYQLQKDLEVKLTSNEAVPTDAAATEVTLYYTEYLENVKQGERFIKLAAGGFCPIKLDEGRPPDSYLRAKRNEGNRCQRPTGLVKVFVNGLKDKNEKIARLFGFRAPSITTEGLDEE ->t30 -IPLRDPQVIITGNVTADELSRFSKLVTVKRGNIIKSLAIVMEDSRGAPVQGDMLEYTLIFLENIKVTRHAQQPFLFVKNGISPIRLEEGDATDIYLKERAYFASGSKRSVPPVDLVLKTVNLNFDRLGAIITLRKPYGILGTRVDE ->t31 -IPLRDPQVIITGTVTVEPLSKSAKLATVKRGCEIKDIAIVLQDTRPAPDQGDMVEYTIFYLEFIKVVKHGQQIILFVKNGILPIRLVEGFAADVFLKTRAYFASGRQRQVPPVDVVLKTAQFNFDKLGAIILLRSPFGILATRVAE ->t58 -LSSKEQNNGLRVSYIKGPEASCTAEVSIYYGKGKDRIGVDVDNSRDQKIALQIMSLTLYYVEYYKLRPTGESRSKRIMSSLLRIVAHRGQLIELMINVKRYEFFGYTKDEVYQNFFLRAYKMELPGIEYLEGLMSYASNIASLEID ->t59 -LKIREPKVSGSNMVADDLVTKNARIATVNKSKIKPEACVLITVNRTVPREATRTPYEMFYLAFLIAHYQTEAVLLFAKSGRMPIRLEQGSVQDYYLKAERHYLEACRKIEGCVDIAIRTLNMNFSPLGKYIILQSPYYILASPALE ->t53 -FRASAQNSGLRTPFIKLPQNACQSKVEWYYGMVKSKIGVDVDDHRDQKMQIQLRFHSLYYIDFFSIKSVGDSKPKKIRSALIKIKLHQKGLMETALNAKRFEIFGRQKDERPRDIFFQAYKLTIPSIYHKISVAAHNRKLARVVLN ->t12 -LEISNGKRVETPDIEANETGRSTDLDTSIFPPNKQAVGAKATDPQVTHIKTGRQRLSYPLAELRMVLQHGDLIIPIKRGEIISIGLENSRIEEVMLDNRRLFADECEKNEEKIDNILKDPRIRYKELIRLVGMASEYSCKASAVAE ->t50 -QAHKNFIQTISGDFDQKPTQAVEGITNTANEQWCQTRFRCLNGSYVVGILENQPGSDIVLMPYQKITNDKDNIIRLIKSVLPLKTMFRSIFSKITLNTQEYYFMGSVIDDVHIGKLVRQLEPLINEIYRQFGMNFEFSIEGPAYGR ->t51 -IPIKDPRVFVASSVSTDEIPHSSNIVTVNYDPINHAYGVGLADMMAVPMIGHRVNFTIFFLEFLKKPQHGDAIISLIANGIIPIKIEHAGSEDVYLNTKRWHMDGCHRDKPPRNSTVKDLKTRFNQLERFFNIKSQHCVQATSVLE ->t52 -FGLSEPNSGIVGKVSKAPQACTSNKVKLIYGNCKTNIGVEVNEERNSPIQSQLKEVNFFFLEFLSMAELGNARLKLFLAAIIPLKLEEGGILDLLLNTKRYDMHGHQREDRPLDVFVKDFKIELSGYSRMSLANTEHQCKASSVLQ ->t13 -LPIYDPELIVGGASRDEPVPNPTFMYEVLVTKARPDLSSGYQSPNTPGPDNDPMELNIYLLAYFGLESYSELILNYIIGDFIPLMGEEQTFELMYSKTRRLKLVACRRDDLPVGVVCKDIDFGFCDLVRILGIRSSWGNQQVAVLD ->t54 -YSSLQLTKLVKGEVGQIEAEHGTLLAELRYDQAAKDAPKGIVGSDLTVFHKIQKKYGINIILFKVREQASECELFLTQGEVLTIRGTREFVNLLLYNAALYHINGTVQDDLRSSLKARGIHPTFDKSYPNQLESAPYLCVNAGTAA ->t55 -IRLKDPETFLSCSVVDDPIPKTTFLSTVSYEEVVKDVDVKLDDSGVVPNEAEQTDFTILYQNYIAVVKNGTGILRIIAGGFLSIKLEEARNDKSYINSQRAAMTTCQRPAELVDVFTKDAMICQEEPGKFIDLRRPNVTMPQLSED ->t56 -FTQQELIRNVMGNAGEKALRCGCGIGAAAGDTWCQDPAFALRSRYMCTVLQLKPGLEIILIEYDTIAFEFETLLSLIESTTAQKKHDRAILDAIRITTEEYYFLADVKDEVYLNFLVDAMRPVLNEIYKTFGVGFPYNIEATTFTK ->t57 -GPVEEPKVMLTGDAADDPRAAKSNVVKVWFPNAKDNIGVDVSEEKTVSIQSERKGFRFFFLEFLKLVQHGDAIVQSIAGGALAATIEEGDLKDVLLRTRKYYIDGGQRDDPPLELYVKNLEIKFDDYMRISGLYSEHESEVSQLLE ->t8 -YSSMQFTKLTTGEVGQEEAEHGALLAELRYEHAANDAPQQIVGADMKTFQTLQKSYSINILPFEELTQNLESELYMTQGQLLTMRRTEGFVNLLLFQHRKFHIEGDVTDDMRMSLMARGIHPSFNKSYPTQLKSAPFLCVNAGSSQ ->t9 -HSLNEWRMLVTASIGEDPVIHSAGTSEASFRQLQEEFNKAVAGSAMMPIQTEDTGFTENSIPYTETDKRHESFSGVIMGKDLPIRLKAGFLAKMVFSNRKYYMDGKLRDEVRSSIIVQGFNPILGSMYRRFSATKPFYCVDTGVRH ->t6 -LPLREPNVLLTGNVSDDPEAHASDVVKVLYPNAQDNIGVNVSGENTVKIQEERRGFRFFFLEFLEWVQHNDAVVQMIAGGEVPIKLQEGGLEDVILNSHRYGANGGERDDPPLDLFVKDLQVKFADCMTISGFASQHKCEVSQVLQ ->t18 -LRIRDPHTLVTNLTTDDLTSKNTKLVVVNRQKIRSDAGVAMTETRPVSKRGGRTEFELFVIDYLKVVYEGESIFAFAKSARFPVRLEQGRCEDGYLRANRQYLDGCRREEGPVDIFIKTLNMNFDQLGKIKFFRSPYALLLQRTLE ->t4 -FPLKEPKLLATGNSTNEPSAHNSNVTKVIYPDRRDKLGAEMADERTAEIAAERKGYNFFLLDLLEMPNHDDAILQMIEGGIMPIKLEEGSFRDVLLETRRYSMEGIQRDDEPLDINVEDLSISTADMLKISGPSSKHQAHVSSVAK ->t5 -IKLKDPKTLISCSLVDAHIPRSNYLRVMGYQEAVKEMEVKRSDSPVVPRDATQSEFIILYQDYIAVVKGSVQLLRIIAGPFAAIRLEEARGNKSFLKSQRAEINTSQQPAAIVDVFSKDEQLSAEELYKIIGIRSPHAIMPSLSEN ->t2 -FPLREPTLIAGAGLSDEAVATSSHLVSVLYPSVRPAQNTASSDDPTMPHNEEEEDLVSIILEWLYFLHWTEQVIKILHSSIIPLCFEHGGVEDTLINTQLITLNAYQRENPPLEALVKELTIGVCDAVRSLGIRKPFTCKLTSALE ->t3 -QVYTQWRRTAAGAVDEFPISNDTDFEARANKGFCKYHVGILQASLICTIQLLKPPMSILIIPYKQIAKEFSTFFKFKQTTSKLAKITKSFLQDITLHYKKFYFWAKMKDELQSKFLARHLKPRLNEKYNLYAIGIPLLLEGASFGR ->t19 -FPLKNPKVMVTGDVSGNPSTHSSKLDTVIYPNNKEDLGVNVPGKNVVPIQSERQRLLHTLLEIEKMCHLGDIILSLSSGEIIPIKHAEGGLKASMANIRRVHMDGFEKPDEELDILVKDLQIRRQQMVRIFGIASDFGCKDAPVAE ->t29 -YSSMQFTVIVTGEMEQEEVEHGRLLAELAYDKLRSDCPKTTVGADMKTFQTTKNEFSIDIIPFKELSQNQESYLQMTKGNLLPLKATSGFVSEIIFHDRKFYITGDVQDDVRISLMVRNMRPLVNKAYRALTTSAPFICANAGTKQ ->t28 -CTHSEWVSITSGDVETEPFQGGKDYKSTRNQNVENKRVHSPSGSNMCPFTARKEQMVIVELLYKQILKEHDKYYEMIEATISMVNLKNGILEAVILTTQGYYFFRELQDEVQLPLIEKEVKPREEEKRRSWAINIVFHFEGACTGQ ->t7 -IKAKEPHHLATSSACESPVTHANHLVSLSFYHIQKGMECKIPDNKEVPTKGAANEFTLFYTEHLEMVKEGQYFIRIIGAGFLPYKLQQGAPPEVFLKANRNEHDTCQRGALLVDVCANGLKVKRDSIAKILAFRSPSVSIEGMNDE ->t21 -FPLRDPSILIGGAVSETPVPDSNYLSEVMYTSLKEDTAANVSNEDLLPPNREPMEADNYVLEYFHMLQHAEFLCNFLIAGIIPLGFSEASLENLYLNMTKFRLSACHHEDRPVDVSAMDIEIGYCNLVRMLMIRAPWGCQLTAVLD ->t20 -YTSEEWLRTLVGLVGEKPFQSGESIGHAAEETWCQNRKACLQGNYLMTAITAKPGLDILAIPYEEICTKFETILKLIEATAAYKELSRALLSRLPLWTEDYGFLRDVRDDVVLGFIMQELRPVLNEIYKAIKMGFPFTIEGGTFTR ->t23 -ISAKENTRGLRRRYIKGPEAQCTTKVSYFYAKAKARIGMDVDNSTDQRTCLQTMEFALYFLEFYKMKPSGPSQSKQIMGSSLRILSDKSGVISLMLSVDRYEFFGYTKDERTQNLFLKAYKIELPGILIMQGMLALATLIAAVYLL ->t22 -KVHTGWRKRVGGAVDEFPIQNDSSLEARANKGFFTNRTGLLQASLICTIQAQKPPMSILIIMYKQIATEFTTFFKFIESTSKLAKLTRAFLQDITLHNKKFNFWADMKDELHPKFLAAHLKPRLKERYNMYAIGIDLALEGAAFGK ->t25 -VNLMDWVAIFAADVVNETPENGKAYSSSGNKKMTNARVNTPEGGEICPFQAKKQKMNLVPIPYKVIVRESDKYYKMWDVTIHKVRKKNNIVEYVILTTQEYQFVNDLQKQVHLPLIENELKPRFEEKFKAWALTIVYHLQGASTGA ->t24 -SRHSDWVALPAGSTDDGEAEEGQAFNSSSEQSFQNQRVNTPEGNSICVLKSDAQRMNLLPIPFQMAVKEDDRYYKILEVVISMVKLKNGIMEHVILSSQDYYFLNELSDEVYLPMIEQELKGRLEEKYRTWAINIFYILDGAQTLQ ->t27 -LPLRNPRMLVVGEASANPTTHGANLDTVVYPNNEEDMAVDVADPKVVPIVSPRQRILHTLLEIRKVCRLGDIILRMTKGKIIPINVEEGGLEAPMLNNMRLIMDGCEMANEKLEIIVRDVDLNFNQLVRIFKLAKDFACGDSSLVE ->t26 -FGVQEPNAGIQGAFSEGPQAAVFEKVKLFHGNAVARTGVDVKRKRNQPYETQLKGFNIYYFRFISLRAFASKKLKFLVAAIIPLKLHEGGLLDLLINTEKYDYFKHKNDEKVLDLLIHSFQFEISGTERLSGIRSRHLCTAGTVIM - -((t29:0.05280573043,(t8:0.1707062623,t54:0.4563089757):0.4502145663):0.04604521945,(((((t1:0.2523925268,t17:0.04893670706):0.1426741928,t35:0.1623440633):0.03288153093,t41:0.2137606088):0.1984404844,((((t14:0.350833764,((t32:0.05632254801,(t42:0.4239481463,t50:0.3244275825):0.2995529481):0.3005377535,(t56:0.4754451768,t20:0.4486943493):0.2195548436):0.1336138019):0.2296215529,(t22:0.184670564,t3:0.1188588194):0.3157209915):0.1997793677,t37:0.07173703872):0.255687749,(((t39:0.1018017628,t24:0.3299387498):0.1679739462,(t25:0.2473670535,t47:0.1672787085):0.2609574371):0.09996401774,t28:0.3953334502):0.3279668446):0.4376199371):0.4736758792,((t38:0.4591847448,t9:0.3637869872):0.2299072992,(((((t27:0.1665153981,t11:0.05876074303):0.1815084708,(t46:0.08615306726,t12:0.4424950442):0.4195314075):0.04336102305,(t19:0.1793081591,t49:0.06875846462):0.02368280315):0.3821140797,(((((t6:0.2117033995,t57:0.4349819856):0.03198438578,t16:0.3610858557):0.08817989085,t4:0.498529807):0.1715543601,((t26:0.4253129638,((t58:0.1590869269,(t23:0.4590332308,t36:0.05938430695):0.154107886):0.3927315217,(t10:0.2527349893,t53:0.20671708):0.461401819):0.3939315621):0.2628853519,t52:0.2259437314):0.3334873459):0.09892685884,((((t51:0.3809899437,t34:0.2898624636):0.1051600474,t44:0.02460373248):0.1178056831,(((t7:0.3473253659,t33:0.3713593586):0.2971306988,(t5:0.2939948056,t55:0.2596151642):0.4792350665):0.3770696354,(((t30:0.3257943266,t31:0.184388829):0.2635684919,t45:0.153127592):0.09515162606,((t59:0.3470149091,t48:0.02510361683):0.3210526931,t18:0.3656459429):0.3970908411):0.2982931658):0.06325583487):0.2602164549,((t13:0.4262702915,t21:0.4880884429):0.3763355494,((t15:0.205492315,t2:0.1790341026):0.375460504,t40:0.3479060988):0.06837146013):0.486690789):0.05120347504):0.120613303):0.4450651569,t43:0.3919042494):0.4272981585):0.4669327336):0.4853668408); \ No newline at end of file diff --git a/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot2.dat b/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot2.dat deleted file mode 100644 index 93b7da4b8..000000000 --- a/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot2.dat +++ /dev/null @@ -1,76 +0,0 @@ ->t1 -DGASETYETHNKEDWSDGCIVLLTLGIQEPENVNERVGINLRFQGLALMAATDLAIVENAIVLNYGIKKLMIG ->t14 -SAVSVGYAETGKMKTKTAPIPVFQLRSTVPYMSLDLVSVDFLELGIAKIWMVNLILVQVLVALRATLSRLRLK ->t15 -ESVEYVWDHRGKMDWNKGNCSICEMRAKPAACPADIFAIGVTLSDIDILQRDEIKKFESGALYEFSVRQIEID ->t16 -IKLSKGEQEKDKVDIASGSGNLVHLRDPRPSFVATLFNVSFQVQGIALMWVTQITLFDNEGVVKFGTRRLRLK ->t17 -DGAHTVFDNKNKEEWYTRCISIKAVGAKRPEEVEAVFAIALRLQGAELISDTEIDIIVNGIVLPYTIHKILIH ->t10 -DGRNREWEEKDKLDWTEGCIALLTFFSPKSQNVARRLGLMLRLEGIEMLSATKLALVDNGGVLHFGVRTIMIE ->t11 -ENAMVVVSERSRKEYPTGCIALMTLPTQKKENRHHTLAVNLNVQALPVIAGPALDMMDDGLVLDVVVRKPVIV ->t12 -KNAEFSTEFRGEMHWGRAAQVIKDIKAPRATLPSDLFGVGANVGSLVIIVRPKIEIYNNGAIKDYAMKYIKIQ ->t13 -AETSMGRLPSGTLEIQAGPLPLYQLKKATPSMSLDLVDVDFHTMGLALLWMVNLLLVDLVGVLKFGLRKLSLK ->t36 -AEVSMGGVPTGKMEIKSDPLPVFQLRNAVPYMSLDLVSVDFHAMGLARIWMVNLLLVNVVAALRITVRKLALK ->t37 -DKLAKSSENKNEKDWTAGDMALQTLKSPQPEMAADLSGINFQVEGIALLFVTQITLLDSGSVESFGVRKIQLE ->t34 -DLLYRTWECQGKMDWGEGKLALIISVCPQPQSVADLFAINARLEGINLLSCEEIALVDSGGVVHFGVRGILIE ->t35 -IELSKGEEEKDRVDIANGSGNLLHLKDPKPAMVANLFNVNFQVSGIALTWVTNITLFDNEGVVRFGTRRLKLK ->t18 -TELSKGEEPKDVQDIASGEGTLLSLKNPETELVADLFDVNFAVQGIVLLWVTNITLFDNEGVIAFGVRRIKIK ->t19 -DGVHRTFEEKDKLSWAEGCISLFAVGVPQPERLADRIGFDKRLDTIPQITDTRIQPVSNGIVLWYGVKRIAIE ->t32 -DGKRVVLEDKNREDKKTGCIAMLTLVQQSPEKVERNSTVNLRLQAAEVPPGSELKKLEDGILLHVGLRRLVLV ->t31 -AGEEFSWGFTGEMHFNKNTFAINDIRAPRACFPRNLVAIGGNAGSLNILTRDRIMIYDNGAIEDYAPIELEIQ ->t8 -DLLFKTWECQGKVDWGEGELALITSACPQPQSVADYFEINARLEGIMLLSSEEIALVDSGGVVHFGVRGILIE ->t9 -DGVHRTFEEKDRLSWAEGCLALFSAAAPQPEDLALRFGFQQRLETIPSMTNTEIKPVSNGILLWIGAKRIAID ->t6 -DGAHTEVENKAKDEWFTRCISLKAIQCKRPEELDSVFGLYIQLQKAELIDDTEITIIVRGIVLVFSLKKIMIS ->t7 -DGCHRTWDEKDKSDWSEGCLALITFFFPKSKNVAKRIGLMLRLDGISILMSTELALIDNGGVLHFGLKTIMIE ->t4 -KNAEFSMEFRGEMQFNRAAQMIADISAPRATLPSQLFATGANVGTLNILVRPKIDLWNNGAIKDYAMKKLKIQ ->t5 -ADTSMGRVPSGRLEIQADPLPLQQLKNATPSMSLDLVDVDFHTLGLLLLWMVNLLLVDLVGVLKFGLRKLSLK ->t2 -TGANCAYEGQNNEEWSEGCIVILTLGIQGPEDVKPAVGIDLRLCGLFLLQNMDLLIVDGVIALNYGSKKMAIE ->t3 -DTLARGEEENDKLEITTGKLKLIQLKNREPKMMANLFNVNFQVQGIQLMWVTNIELFDDPGVVKCGVERVAIK ->t33 -DGAEVVLEDHNKKDWYLDAIVLLTVVQGEPEKVREASVVNLRLQCAQEPPGAELNILEDGLCVQVGLKKLVIE ->t29 -EGVEYVWDHRGKMDWNKGNCSICEIRAQQAAVPADIFGIGMRLADIDILTRDEIKKFENGAIEDFAVRELEIE ->t28 -DGKRVVIEDKPRKDLKTGYIAALKLIDLGQKQTAETSAVKLRLQTAEVPPGAKLDMLDDGILIHAGLRKLVLV ->t30 -DGAHTVVECKMEDEWFTRCISLKAVQCKRPEELDSVYGINIRLQGAELIDDTEITIIVNGIVLPYNLEKIMIH ->t21 -NGSLCAFEARINEEWSEAEIVVITLGIQGPEEVRRAVGINLRIAGLMLLQNLDIAIVDNVMQLNFGGKKKAIE ->t20 -NGLLCAFEGRGNEEWSEGCLVPMQLNIQGPEKVKRTVGINLLLKAIFILRTISITIASDVMALNYGSIKKAIE ->t23 -GSLVRGEEDKDKFEFVNGTMELINQKSRDPKMLSDLFNFNFQVDSIELMWTTNIVLFDQAGVVKFGQKDSALK ->t22 -MGENCAYESQNNESWSRACLVGLKLGIQKPEDPRKGVGLVLRLCGMYLLQNYDIAILDNVIACNYGITKQPYE ->t25 -EGAQVVLEEKDRKEWPSGCIALVTLPQQKKERAQQTTAVSLRLQEVEEIPGPELNAMDDALVLDTGLRNPVFC ->t24 -ENCHSVFEESDKQNWNEGALSLLSLAAPHPQNVSDRFGFQLKLEGITTMADVEIVLLQDGGVLRYGVTRAMIH ->t27 -GSLVRGEEDKTKLAFANGTIELLVNKRRDPRMFSDLFNFNLEVDSIELMWKTDIVLFDRAGVVHFGQKDSAMK ->t26 -DGKRVVIESKNRKDWDLGYIALLKLVETSPKKFAEDAAVNLRLQLGEVPPGAGIDMLEEEIVIHAGLRGLVLV - -((((t34:0.07550475613,t8:0.1661541371):0.4656987466,((((((t2:0.1288377236,(t21:0.1811405006,t20:0.4858660521):0.2240310247):0.03964987683,t22:0.4599127006):0.3359611631,t1:0.2363237946):0.2728351074,(((t11:0.4032538521,t25:0.2348158365):0.327462819,(t33:0.3799740688,((t28:0.2428439159,t26:0.3299351952):0.1542282197,t32:0.2837195101):0.1950211665):0.1196306065):0.3229062755,(t17:0.4100004928,(t6:0.1863123286,t30:0.1048943802):0.1123661269):0.3444500776):0.1683306926):0.2763354286,((t19:0.3096567021,t9:0.3329816794):0.3133107798,t24:0.4139463856):0.2179547601):0.01925154311,(t10:0.2766797507,t7:0.139320353):0.2086227393):0.2007250186):0.1953729873,(t37:0.215303967,((((t27:0.2903754513,t23:0.06236225095):0.3131607551,t3:0.1956412904):0.3597712269,(t18:0.3874756565,(t16:0.2171020633,t35:0.08968397858):0.1611236004):0.1602950482):0.05909167558,((t5:0.08690790882,t13:0.07878436548):0.1031973071,(t36:0.04197606065,t14:0.4876447841):0.3369911134):0.460927923):0.4784635461):0.4117354944):0.4197408466,((t29:0.03356232398,t15:0.1620883915):0.2348440097,(t31:0.4152694446,(t12:0.08822142891,t4:0.1750063027):0.3417265624):0.3107736763):0.4474694425); \ No newline at end of file diff --git a/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot3.dat b/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot3.dat deleted file mode 100644 index 498e4b48d..000000000 --- a/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot3.dat +++ /dev/null @@ -1,184 +0,0 @@ ->t89 -SMGPELLCGVEQSRRWEESDINTVVIVLRWTQVKQVKDLLSTLVGFLAADNNRVLALFL ->t88 -NIFPDLNIGTESRLDHLHNDIENLELTFTYLNSQHWSNITTSIAYMWVMEGENEIHLLH ->t87 -SLFVEQILGTQITLTIQHMPVTELAISYKYKSASQSAQLKMNLALYLVQNGDKTIGLWR ->t86 -AMSAELILAGRIEIDLKAFVLLEIDQNLKYSRVQYVVRLRANFFRRWVNRGVNDIATWE ->t85 -QFVITNLLVTALRIEWQAFPIKDLKLLFEFQQVQLGLERNRDLKRNWVLSVAKRINLFY ->t84 -TLWLEELLDAKTTVFLAHEELSNLPIAYKQKMVTHKCNFEKEIEMNWALAGESEMGIWF ->t83 -NKFQGILANVQHKLDWKKYDMHDIELRTIYVNSHHAFELVQILDHTWSLANDLDIGLLG ->t82 -CQIVEAIMGSELVIDLRHIQLNDMALFFRDNLSTYTTKYGSNLEEYWSLDGFTEIGIWY ->t81 -SVNAEDTVGTKITLDLVHLDISELEMTYKDKDGTHAARLKSELELAWVRDGNKQIGYLL ->t80 -ELTLESLFGDSSGITIEYIEMSETTISNQHKQACHSSKVDQDLAFFYEADVEKSIGPYK ->t72 -ELTREQLFGSSAAITLEYIDMTEVTISNQHKQACHDTRFDKELSFYYEADAATSIWIYL ->t73 -CEITEALFGSDLVIELRHIQLEDIALSFRPQSSTYTDKYGSNYDAYWTLYGFTRIGIWF ->t70 -CAGHEMNTPVVHRADWKEDELDALDIADRLQKLHSAANLKESLLGLAFRECNKVINKLY ->t71 -GILPDQILGTSLKIEWEHFEMNEIDLFLKYDDVLHEADFKTDLDMYWVQEGQDNIGLFL ->t76 -ALFAAQILGSKLTLDIQQINLTELHINYKYKNATHAARLKSGIEFYFEQNAEKNIGLWY ->t77 -SVVVELIIGLPITIQWEKIDDTDLDMQLKFYNVQYVVRLISNLIKWWVEKGTERIGLWY ->t74 -TVVEKMILGTPIMLDWDHIDISSLQMQFKYRHVNYKVRLKRNLIKWWVEEPQGEISSWL ->t75 -SVCEDLILGNPLILARYHIDPTPLQLQPKYEVPNYTFKYKYEIIKWWVDDSGGSILVYL ->t78 -SLVMSVILASRIRLEWSGLSIHELELVFQFNYVQQERRLRPNLTDWMVLTNSKQINILY ->t79 -TLAVEMILSAPISLEWRQFDIVDLDMKLKFSHVQYVTRLKSNILKWWVEKGPKNIGLWH ->t90 -ELVLAKLLGSKLALNIQEMVWTELCISFTHEQSREETKFEQDLAFYYEENVARKIGIWL ->t91 -SVIDELVLKDALWVHQFQIITCPLEKNASYSQWDAKVSTKNNLIRWQLTHAPGELRVWA ->t8 -NLFQDMELGTEYKIDWKKNGVAQLEIKIRIVASLHAPDYVQEMTQFWALDGERFIGLIY ->t9 -GILPDQILGDSLRIEWEHHEIDEIDVLMKYNDVAHAVKLKTDMDYYWVQEGHRSIGVFF ->t6 -CQLVEQILGAQLVLDLRHIELADLALRFRNQLSTYASKFNSNLEEYWALDDATELGIWY ->t7 -TVVDELLLTKPIFLCRYSIVTCPVSKKAEYKATNFKVYTQKSLISWKVKDASGEIMVWL ->t4 -RIAVSMLCAVRIRIDYAHLDVEDIDFVFQYNHAQHLIRLITNLTDWLVLAGGQEINLIY ->t5 -CMGPSLMMGTEHRRNLKDSDIETLEIPFRWTKVQKVANLITSFHKFIAQDNNRVLGLFY ->t2 -SLFDGLEVGVENKIDWKSQNVSDVEIKTKYAGSQKGADMSQILAALWALYSEIDIGLLG ->t3 -PFIPELTDSVGLRMDFDDRNLEEIDICYKYCKVSHVVELKEEYTLYWVKESHQTLGKFA ->t1 -VVVKDLILDTPIIKDRYHLDVAPLELQPRGKTNHKMYTMLYDILTWYVDNDGGAIMVLV ->t14 -GIIPDQKCGDAIRIEWEGHHIDELDLLMRYKDVPNAEKLENERAFYWVQEGRSSIGVFF ->t15 -QLVVSMILATPIRIDWQHIDITDLEMTAEFNHVPYTLRMKKNLQEWWTPQVAKQINLWH ->t16 -PFLPEGADSVALRQEFIEKGLEPIDYAYKYSQVNRMTDLREDLSLYWVFESNSSLGKLQ ->t17 -SVIDPTMLFEAIILKRARILNCPLMLNCNFDATDFKVQTKDKMAKWKASDALGEIRLYL ->t10 -TMFPDINLGTEHRLNWKHTDIEKLELTFRYTQVQSAANLITTLQHFWVMEGSRAIGLFY ->t11 -ELAAEYILNHPIELDMRRINISSMTHTIKFNHVQYVVRMKGNLLRWWLEQGSKSLDIWR ->t12 -GLVPDKILGNSLRIDWEYMEMDEIDIFIKYDDVLYVVKVKTDLGLYWLQEGQKNINMFY ->t13 -EVSLEQIYGKSSAIQIEYLDAIEVVISDKHQQACRETRINQVITFYYTASGATTTWIYT ->t18 -SFLPEIIMDTNVQLSFPNNEILELELKLNYGDVSHAARLRNNLKYLWAPEGSQKLGLCA ->t19 -GLLPDQIWGKKIEMELEDFEMQEIDAFISFNDVLHEVDFKTDLDMYWVQEGQKNIGLYW ->t29 -PLLPAETQGVVIRIDWDDDQLDEIEISTLFKKVSRKVPLKEDIGFHFVFEGQQTTGIFA ->t28 -QFVITDLLVTAINIEWQAFPIKELKLLSDFQQTKIGLPRNRELKRQWVLSVAKSINLFF ->t21 -ALMLNMLLACPIKLDWMEFKMSDIKMIFEFHHVRYALEKKADEERKWVVRTAKSINLYL ->t20 -EGVEESLLGTPIKLDWFHMSIAPLAMAAKYVTADYKVRIKQILIKWWAVSNVEQILAWG ->t23 -CQIVQVLLGFELTIDLRHLQLEDMALSFRDQLSTYTSKYGSNLEEYWAGDGFTELGIWY ->t22 -RIVSDILLESELKIDWLHNDIEELELRFTYALSDRWTSIAASISYLWVTQADREIGSIY ->t25 -CTGPELSEPVHHQADWKESDLDSIDISKRLSKMHSRANIIATLLYMAFKECKQVLQVLY ->t24 -ALWLESLLGAKITAQIKSIELTDWPMAYKQKQASSLLHFDKDIKLNYALEGGTHIGIWF ->t27 -SIAVSMICAVRITIDYAHIDVVQIEFVFQYNHAQQTIRLKTGLIDWMVLRGGQQINLIY ->t26 -AVLQEIILIEAIILNRAMILTCPLKTSPNFNAVDFKVQTQNRIIIWRRRKALGEIHVFE ->t49 -SFLPEIIMDSGTKIDWGNNDIVELELKLNMGEVYHAVKLRQNLKYLWAPLGSRPFRLCA ->t48 -SILPDIILGAEFAFDWKHNNIAQQDLTFRYINTPHQSDLESSIAHLWLMEGDREIGFFY ->t43 -TMMPDITLGTRNRFAWKNHDIDNLKITYRYSQVKSAANLITSAQHFVVYEFPKKIGILY ->t42 -SILPDQIDLTNTRIDLADFDIQELEMTYTYGDMSHAVSIPNRLRWLWVQEGKRSIGLSF ->t41 -SFLPDQINGTNTRISFSDHDILELERTYDYADVGHAAKLKNNLRWFWVQEGQRKIGKCY ->t40 -GLITDQIWGTTLQMELDDFQMQEIDAMLRYNDVLHDADFKTFLDLYWVQEGQKSIPLLW ->t47 -SIFVEQILETGIVLDIRHIHLTELAISYKYKSKSESARLKHKLELYLVQDGDRKIGLLH ->t46 -TMMPDIMLGARKRFAWRNHDIENLEITFRYSRVQSAANLLTRAQHFWVYEFPKKIGIFY ->t45 -EGVEEKLLGTPLKLDWFHMDIANLSMAYKYCSVDYKVRIRRILIKWWAPEKIDELVAWG ->t44 -VLVILMQVIKPIAMDVAHMDIDELDMTFQIYHSCYALRLAEGLSKWWIKLGAKQVGIWT ->t38 -TLVVMMMVAKPIAIDWMHIDITDVEETFQFHHVVNTLRLKRGLQKWWMRAGTKQVGLWY ->t39 -VLLAMRTVTRPMGIDWAHIEIANLEVNSEFHYTVNALNLSEGLHKFWIKAVNRQVGIMV ->t36 -PFLDEEADEVALRQDYEDRGQEDIDIVFKYCEVNYMTELNEDLSLFWAHESRTSLGKFF ->t37 -NLFEGLEVGVENKIDWKEDHVSDVEIKTKYAGSGHAADVQRILAQMWMIESELDIGLIA ->t34 -HSVEQYILGTCKIYEYFHIDTSPLFMSYKNARVFYQLIIKRLLTKVWAAQNSSQLMSWM ->t35 -ELATELILSSPIEIGFKSFELMELDMKLKFSQVQYVVRLRSSLFRFWVEKGANNIATWK ->t32 -VVMKECIVDTPIIQNKFFLEVEPLELQPRGLTNNRLYTMQYNFLPWYVGACEGAIMVIA ->t33 -SLLIEQTLGSKIADHIKHIELTELPPCYRRKRATHEARLEKEMEMYWVWDGATEIGLWF ->t30 -VVVKELILQTPIIQDRYHLDASPLELTPRGKTSNKMYTMQSDILTWYVDNTGGAVMVLV ->t31 -EIVLEQLLGSKLAIKIQLIVFATLSISFQHKQTRDESRFTQDLAIYYEEKKAKQIGIWE ->t58 -HGVKEAILGDPLILDYFHISTTPLAMSYKNARVDYKVILHRMLTKYWAAENSDQIISPM ->t59 -TVVDEVILDSPIIQDRYHIDVTPLEFQPRFKTSNKKFRFKYDLLKWYVDDGGGAIMVLV ->t50 -GFLPGQCLGTSLKIAWEAHELKELNLIIKYNDQMHEVDFETVLDLYWLQEGKRTIGLFF ->t51 -SIVTEVILGTAIIIDWDYIDIAMLQLQYEFLHANYSIRPKRELIKWWVKEGADLIGTCY ->t52 -ELYADQILGQKVTLAIQKIDITELKITYTYRDGANASKLKGALELYWVRKGSKDIGIWF ->t53 -SVINELVLKEALLLDQFALVYCPLQETENFNRWDVKLSTNNEYKRWSLTQLDGELRVWA ->t54 -EKFNGLLVGVEHKMDWKKHDMSDLELRTAYVNKRHAADLVQILNHRWSLEGDLDIGLIG ->t55 -PLLPEETQGVSLRITMDDRNIEEIDVTFRTGEVSYRVPLKDDMSFGFVYEGQATIGLYA ->t56 -GILPHSGLGDAIRFDREWSLLEESDLVTKYRNLTHAVRIKTEMNWYWVQEGQKSYGVFY ->t57 -RPHLQQILGTALTLRMQRMHLSEVSISTKYNDASQSSRMKSNLNLYFTQNGDRKIGLWF ->t69 -CAGHELSTPIHHRVNWKENDLDSLDISFTWSQVHSSANLILSLQYIAHRESNRVLKVLY ->t68 -SIVTEVILGTPIIMDWDHIDITRLQMQYEFRQVNYSLRPKKKIIKWWVEEGNDLIGTCY ->t65 -GILPAQLLGHGLRIARVHSLTEEVEFGTKYRDITHVMKIKKEMDYYWVQDGSKSYGVMF ->t64 -CAGHELSVPFQTHVSWKEGDLDMLDISFRWNKVHSAANQIEACQYLAHRENNQVLKNLY ->t67 -GLSEEFILLSPINLDWQRINISDMNYSIKFNHVEILVRLKSNIMDFWLEQGSKNIGIWQ ->t66 -AMWLETLKGAKITAEIKAIELPDWPMAYKQKQATSMVHFEKDIKLNWALDGGKHIGIWF ->t61 -DLFDGLEVGVQNKIGWKSRNVSAVEIQTKYAGSEKGADPSQILPALWARYSEADIGLLG ->t60 -KLVLEKLLGSKLRINLRFIELTKLSLSYQHKQSAHEEKFEQEMLTYWAENAASRLGIWL ->t63 -KLAGEMLLSYGIEIEFKSYELMEFDMTLKISSIQYVVRLAQNIFKWWEQKGPNNIDIWR ->t62 -CMGPELLCGVEHSRQWEESDINTLVICMKWTQVELVPDLVTTVQEYMASDNRRVLALFF - -(((((t12:0.4058124037,(t50:0.414135982,(t71:0.04773313274,(t19:0.2286879604,t40:0.3243552307):0.2733125932):0.1787166429):0.04285015783):0.1370684891,((t56:0.4261670952,t65:0.4030038211):0.4121931487,(t9:0.1199146688,t14:0.3420166821):0.07322587519):0.1571529547):0.1944887109,(((t29:0.4457360164,t55:0.2230271995):0.214336898,(t3:0.1830648972,(t16:0.3508802948,t36:0.3119602069):0.1913000666):0.3164669155):0.3836102735,((t42:0.3065726217,t41:0.2195015104):0.01644932335,(t18:0.1276024905,t49:0.3314598242):0.4223436749):0.3231788553):0.3244174687):0.3424886202,(((((((t63:0.4463281356,(t86:0.4487705319,t35:0.07755016872):0.1364631557):0.3311531787,(t67:0.223298337,t11:0.4563027303):0.2447354441):0.16468024,t79:0.08253695803):0.4153305012,t77:0.3297965309):0.1183197021,((t51:0.1997895308,t68:0.1783694155):0.30722894,((((t45:0.3006322082,t20:0.1922012186):0.1583428227,(t34:0.4481944773,t58:0.2268395659):0.2564666618):0.4452878688,((t75:0.4588891151,((t30:0.1336088778,(t32:0.4242691795,t1:0.1238576204):0.01996989314):0.4225660608,t59:0.0560505195):0.332614762):0.1523819584,(((t53:0.3959951265,t91:0.1985216851):0.3885259972,(t17:0.1919784702,t26:0.3439101944):0.3684804933):0.3416076366,t7:0.2035563438):0.4644157799):0.1666925311):0.1528237067,t74:0.1175228897):0.3958303248):0.2147541236):0.1536742888,(((t15:0.09298435405,((t85:0.05423440812,t28:0.2376628392):0.4166068675,t21:0.3255865238):0.3978607887):0.06766634134,((t4:0.2127887675,t27:0.204731762):0.3784428047,t78:0.4185280237):0.1666912341):0.09724573111,((t39:0.3804533784,t44:0.3386150737):0.2104973887,t38:0.0763741171):0.2933087598):0.04618073053):0.2999661779,(((t81:0.4181110752,t52:0.4753042268):0.121691231,(t76:0.4173459025,(t87:0.1415096635,(t47:0.2851307083,t57:0.3734432736):0.01812370295):0.1818875866):0.1879870618):0.02082100053,((((t84:0.435930954,(t66:0.2202056462,t24:0.1940830634):0.4075173674):0.3888876516,t33:0.3289482686):0.1508635325,(((t31:0.3490703034,t90:0.3124828316):0.1234136608,(t80:0.4310728993,(t72:0.1032632415,t13:0.4382505357):0.0566574995):0.3835091353):0.1749119179,t60:0.3315673489):0.3509579058):0.2229843093,((t23:0.1933352762,(t82:0.1598001998,t73:0.4888215623):0.05677521894):0.275414566,t6:0.04418534178):0.3828833614):0.2949947462):0.09065046343):0.4526107045):0.1646788803,((((((t69:0.1113295462,t64:0.3718997196):0.04992173914,(t25:0.2774039782,t70:0.4069814369):0.2439034624):0.4599559886,((t89:0.2275867807,t62:0.1229439434):0.3949740535,t5:0.3000476232):0.1556077706):0.2081983375,(t46:0.1036208793,t43:0.1044535853):0.2270604258):0.07572068577,t10:0.1236715544):0.1134664274,((t8:0.406375252,((t37:0.1843091359,(t61:0.1230903916,t2:0.0299357751):0.2440263048):0.1674659117,(t54:0.1960569305,t83:0.3447615895):0.3122112679):0.4911483215):0.495653342,((t88:0.4619603305,t22:0.3467479648):0.3170240384,t48:0.4507798816):0.06783763524):0.1583678997):0.1812778597); \ No newline at end of file diff --git a/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot4.dat b/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot4.dat deleted file mode 100644 index d8880a0a3..000000000 --- a/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot4.dat +++ /dev/null @@ -1,110 +0,0 @@ ->t49 -FTALVDKYFAGQICFLPAQLVKPKSHIDPDFTPENAPTLKVRQLKKLGKKYFDRTRLSERKVDGLSWHADHK ->t48 -MNEGLKRLFIPIMGITSTLAKALEDDRDKEKETRDYIQVEQNLSVKSREVYLTNDEFDRLQAEYLAEQKEYK ->t43 -VVDGAKRIFMDIFIMASIVSEPMEDGANYEREQRAYMQVAQKLSKKSKELILSYMAADRLDCEHIKTSHKYE ->t42 -QPHDYDNYFKWLWLLIEFIITFIREYNGKVYDANSNPTVTVQASSKLLENFTDDTVYEPLRSDFITQGADYE ->t41 -QLSTYEGVFGATIMIEDYHSLAVKQADEKMYEDAERVDKSSKLSDKKVREYTSDTNFEPVSADFIHYEAELN ->t40 -CTDRFEKTFIPILGLTVSIPTALQEGNNKEKDEAEYLRVSQQLSVRTRQVYLTNVEFARSRVEYMAAQKDYK ->t47 -LKDDMYDYMMSVLTAVKFIAITYATPLVKVHELDKKPRDSLSTGRRAPPDKTTQTKTEYIFNAFVGPPEEWT ->t46 -LEEDYDGYILAVRAIVAFMSITYRTPEVQVYQLKLKPQDLLSAGSRNPPDKTPRTSTESIFQDYIGHDAAYL ->t45 -QLKSYEGVVGTADSAEDYGSLSVNAATETLNDDDERDDRDSRLQERKVEEFSSDCNFEPVKGEFIHYQDSLD ->t44 -KFNDYDGAFTAQLVIEDLLSVAMRTADENAFDDEQRPSATAQCPSRIVEEYTNDSKYDPLKADFYTHDADYQ ->t1 -FSAALGKDFMELIALNYLNPGEVRENLDPAFTDTECATAKLRQQAKMGRKHAAQRQMGKEKVDVLAYDAKSM ->t14 -QLSSYEGVLAPTLMIEDFLTSAVKQADEKVYEDPHREDGTAECPGRIVEEYTSDTRYDPVRADLFTHDADLT ->t15 -LADDKKTYFATGIVTLAYLNVEVKQVDNEGYTARTLRSRTLSACTHLVEKYTADNMFTWLPEDTLKHNGEYV ->t16 -LSGGLSKDFMVLIVMNYMTPGETRENLEPPFSNTEASTSKLRQQGSIGKRYSASRNIAQRRANVLNYDARMT ->t17 -QLDSYRGMVGTTIMKQDYDSLSVKQASEKLHDDEDRADRDARLQDRKVEEYSKDLNFEPVKGQFIHYQNDLD ->t10 -ATDGFKRLELPILPTLISISTNMEGGINAEKAEAEYLQVAINLSANRAEIYLVDTDMDDLRDRYLPAQQPFH ->t11 -FNKGSDRFFFPIITISSALTKGLKQGKDHQKHTYTQIITARNLNKYQQEVHLEQDQFKRLEEQLLMDSQDSD ->t38 -LSEEYRYYYIGVNVIVEYLCFDFRFSWVKVYEAKADPNKVASAASRKSEEHDEHSAYEPISGDKILHDSEIN ->t13 -KLDSYKGVVRTTVTINDYEGLTVRQATEKLKSDEARADRDTRLQDRQVQEQSANLNLEPVKAGFIGYREDYD ->t36 -FPKEPEGYFITLNVVVDLIETANRGSMEEVFDAEDKPTALICPYSEIISSYADDKAWDPGKAKLISQRSEYD ->t37 -LAEGYQGYYLAVKLMVVFVCLNYVTPNVQAHDLKMNPQRLLKVGRRMSTEVDDNSNYEPISGKVITGDADYA ->t34 -QISTFEGVFVASIVIDDFHTVAVKRADERLYEGDQRADASSICQDRLVKEYDSDCNFDPVRAQLFNYDADYN ->t35 -YLENYRGVVSTVIMIEKYNYLTVKQATERVQDDEDRADRDNNLEERASEEVAKELNWKPVNSQFIHYQDDIN ->t32 -LPRDNEGFYTTLNVLVDLLETATRGSEEEVYDAKQKPTEFCGAASKIMTSYTKDKSWDPGKAKLIARKSAYN ->t33 -FADGKEGYYTEVRVIIAFSSLSYKAPDVSAVELQPNPTELLAVKGRFPSAMDDQDIYEPLAGDIITGEAEGA ->t30 -FTGSVDKYLSALIAFRYGIAVKLRNVIDPEFTPDQAPTAKKKQTRKLGKKYLDQTQLNDRKVDGLLWHADQK ->t31 -FPKDPEGFYTTFNVVVDLLETATRGSMEEVYDAKQKPQTYLAAVSEIITSYTEDKNWDPNKAKLINVRSEYN ->t18 -LADGYEGYYTAVRVMVAFSSLAYRTPDVSAVDLKRSPQDLVAVGRRMPSEMDDEDAYDPLSEDVITGDAEYE ->t12 -LWKYYEGYYMAVRAIVALLALAYRTPEVKPYELKLRPLDLLAASAKGPPDFDDQHSREPLSVDFITHDAQYM ->t50 -FSNTLIDYFLRTLGLSYPIVTSITETNDEEKNGSNTLAVEQQKSVLIHEEYLNNTQFEIGRLLDFAFEADYD ->t51 -FARDNTTWETGLNLTLNLLAARVHAAEEKVYDPKEQPTDACASKPGIASDYTDASSWDTDQVRLISRNTAYQ ->t52 -FSAAVEEYNMALLALRYTTPDTLREDIDPPYTDAQGPTSKTRQVVQLSKRHADQKQLDQTRVDGLALDADQL ->t53 -AFDGFERLELPILGAICSLSSSLYGTIVAQKTQAQYLQVAMNLPFQRAPLHLVDVDMSRFRDQFVPAQQPHR ->t54 -FSAALGKDFMELISLNYLNPGEVRENLDPAFTDTDCATAQLKQQGKMGRKHAAERQMGKEKVDVMAYAAKQK ->t39 -MNEGSESMFIPIMSITQVISKGLEESRDRQKEAREHILAGRDLSVKLKEVHLTTDQFKATTDEPMAQSLENE ->t8 -YSNAFDEYFSKRLGLSYPQTATLADDTDKEKNGTDLITFDKQACMLIHEEYLDSTQFTFSRADELSMETDYD ->t9 -RLKSYEGVVGTSDLAEDYAELTVMAGSQTMHRDEELDNQDTRLQDRKVEEYSCDPNSEPVKGDFIHYYDDLD ->t6 -FSRDNSGWYSGLNCLVSLLETRVKAAHEKAYNAKERPSNACAAVTSMSSSHTDEEQWEPGQISLISRRTAYN ->t7 -VTDGPKRLLTPILAYASMVSEAVEDGGNKNREEREYMQVAQNLATKSREVMITNISMDRIRCERLQSTSKYE ->t4 -FSGGLNKHFVALIVLNYTTPGETRESLAPPFTDTECPTAKLRQQGRMGRAYAAQKEMGKRRVDVFTYDQKIA ->t5 -QLSTYEGVFGAAIMIEDYHSLAVKQADEKMYEDAERADKSAKLSDKKVKEYTSDTNFEPVKADFIHYEAELN ->t2 -DANSYESVFYATLLLSDFLVVLSKQADDKVYEDEQRADATTQCQDAIVKDYISDTKYDPTRAEFFSKDADAT ->t3 -ETRSYGGVFFATLLLSGFLVVFAKQADTKKYKDPQSAEPTQQFEDQIVNDYINHHNYDPVRADFLAKDANAT ->t19 -MRKGSDRLFIPIISITTLLSEGLEQPRDRQKQTRTQIITAKNMTVKQQEVGVANGQFKRLQREILTEPKEND ->t29 -LKDDYKNYHKALLVIAAFLNVAIRQTDEKVYEAKKMPTRTVSAESRLAEGYDDDTAFQWLSDDFITINANYK ->t28 -YSDAFERYFSKTLGFSYPQSATIQESNDKEKNGTEMLTMDKQSCVLIHEEYLDNSQFDFSRADELSMEADYE ->t21 -QSNEYQNYFGWLWVIIELLSTNIRESNGKVYDAKQKPVATVQESSAIGENFTDDTVYEPLKGDFITQGADYN ->t20 -ATDGFKRLELPILGTLCSISTALEGGINAEQAEAEYLQVAMNLSVERAEVYLVDVDMDDLRDRFLPAQQPYK ->t23 -PSDGYKTYYAALMVTGDYLDVEMKETDENVYTARTMRTRTLTAASRLAEGHNANNAFEWLNADTMEHSGEWS ->t22 -FSAAVDKYFMALFALRYATPDTLKEDIDPPFTDAEAPTSKTRQVAQLGSKYADQKQLNQKRVAGLALDADEM ->t25 -YSGDYRRHFFALLAFSYGVSGVIQETVDPQFDEDESPTSRAKTLVKIGQSFLDNTELNPRKADKLCEKRDQK ->t24 -FSGALDKHYIAMIAIAYLTPGALQCDMDPPYTDMEAPTFTLRQAGKLERKYADEQQRAKRRVDGLAYNARQQ ->t27 -LSNTFKNYFMKVLGLSYPIVTASQEANDKEKVGTHVIAVDQRKTFLLNEEYLGVSLFNFSRVSELNFEADYD ->t26 -VSEEHQYYYIGVSVIFEYLSYDYRFSWVKVYEDVVEPTKVGSSASRETDEHDEESTHSPINDDMILQDSALG - -(((t6:0.2531555823,t51:0.3411596169):0.2477639861,(t32:0.05970772538,(t31:0.1606154598,t36:0.4329883453):0.2613293303):0.3206277792):0.2333967178,((((t14:0.3277204457,((t3:0.4335358745,t2:0.06400334064):0.3157650484,(t34:0.2787311969,(((t45:0.1147901381,t9:0.2427537355):0.1740088223,((t17:0.1406880321,t35:0.3898961398):0.02668694016,t13:0.4582850721):0.07129815704):0.3357846918,(t5:0.03869605405,t41:0.05382303932):0.08743629256):0.2365553248):0.1961887583):0.09039827978):0.2541520049,t44:0.1323649817):0.3312331642,(((((t27:0.3651852297,t50:0.4178156554):0.1536992896,(t28:0.05183849934,t8:0.3638458821):0.2466042426):0.3751680832,(t40:0.1833315858,(((t48:0.01235738342,(t39:0.3280724016,(t11:0.3584670685,t19:0.1475125901):0.3216845365):0.3737666139):0.2447962763,(t43:0.4899012212,t7:0.1003897468):0.3958714078):0.164244208,(t10:0.1184884631,(t20:0.07755822053,t53:0.4529059735):0.02255809833):0.4585674997):0.2101014278):0.2651026013):0.2611581148,(((t49:0.4118369637,t30:0.1533423103):0.1432345487,((t22:0.05827677987,t52:0.1836905452):0.07232267282,(t24:0.2998282501,((t16:0.4543635364,t4:0.2545857227):0.2665725231,(t54:0.08579347092,t1:0.0489606238):0.09677886171):0.1793335054):0.2173806967):0.2999001562):0.3494904488,t25:0.3616052435):0.4597340067):0.4770850052,(t42:0.4598775818,t21:0.1245115691):0.1951526437):0.153676096):0.1138952,(((t23:0.2873184349,t15:0.4472107442):0.3866416883,t29:0.3721649973):0.328268029,((t26:0.3466038297,t38:0.290625521):0.4590780461,(((t47:0.4919990611,t46:0.2104852911):0.1622239988,t12:0.2650135105):0.09984755999,(t37:0.4011966978,(t18:0.02103958966,t33:0.3948064769):0.2239951074):0.2573473376):0.3517749564):0.1645918443):0.1431850951):0.1501088557); \ No newline at end of file diff --git a/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot5.dat b/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot5.dat deleted file mode 100644 index 02a7d6c66..000000000 --- a/tests/hbltests/libv3/data/protgtr_fitter_alignments/prot5.dat +++ /dev/null @@ -1,200 +0,0 @@ ->t89 -KLQEITRNSHDRRNLVANSFTIDIYILTAVSTIGSEGEATLRVYGTFEGSFFWHQEYEQKGIFHIIHEMKALTKVDNNLVRIDNVLLTIVNLK ->t88 -CIDQLHTCNEEKSKRWDYSRISMFNILIDGLQVHTRGDKPLAAFSLSTYKMFEQLMFITNEEVMLFSEHCDRMAQENPLHSLDDKKNFLYLYA ->t87 -TIEATDSYGHRRSKRWQYSTLSKFFFFIDGLQKNARGNEPYAALTLATHKVFQQLALIKDADILLTAEHGSAVEQDRPLHALGDKQYVLYYKT ->t86 -EINQLYRAGDAGSEQWTYHLVATIDIFEADYRADEEAQSSNKVYKFQTTRMAWEIGYTQQEHFRLLTVQNAGLQWEVKLKEEDTQSIALLALS ->t85 -SIKQMYRSGEHGKEEWAYQIFATLNAMIPVLKRNERGYTSLESYQLEQILQYDDFTFTKSQTSDLIIGAQGNLDWDGHILHMDEEELASFVTN ->t84 -TLNQLHRTANNGQDRWGYDVAMFLDGSLSVMTEDEQGYMELHLKQIEALRMMPEDVYRRTSKTTIAVTKQGKLKQRTPLRYVEEDEIITYKTS ->t83 -AGHHKGMANESDREGWLYGRTPGLAITMVKAKNNSAEQTHLSTIEIWAVKMLFRVTYVQGDEFHTLYHLMGSFSYEDPSTELRTSVISQQGMG ->t82 -QGNGRSLNREDSRSSWIYSKTGVISIRTVFDKNNNFQVSRPCCLKVLALSAIFKLTYVEGSDFDQELHLLGALDKERPNFEFKTDVLSQQGNK ->t81 -KPLEKSSVNQNIKNRWAYENVQCIDILVEKFGHKNVAQLHLATTKISGVEYFYPVTLVKLEELQYVNDSVGATLNNDSPVAQELSVTSKEAYP ->t80 -TIDVTDSCHPRKAKKWHYRRLKKIFMIIDGLQKESRGTSNYPALSLHKYRMWEQLLLLKNKELMLFAENCSKVEMDTKFYSIPDKKNMLYSKS ->t72 -RVNQAVHSEGDSGEQYATGRVTDIEIRADILKNREGSKGEIKVYTFQRATIKWDITFKRSSNTTILAVEDPLSEWEKALNQIENSAIILLGLS ->t73 -SSQAWDMTDEGGRRSWLYGESSGLAIFTTTHKNDNAQVAHLCVIKILAVRSIFRVTYVMGSDFHMEAYLVDVLDYEDPKCELKEDVLNQRSMP ->t70 -ALNPICRYGKTHTRSWLYLAIASLGILLLELPDPAQNDKSFGSHRSRPTKLFSAITVIQNKEIRLWGAEEGNLEAEEPLYYISEESLHLHLFA ->t71 -SVDKSYHTESEEGEKFASALLADLDIRADIYSTRDEARARINVYIFYRAIPYWDIYFQHESNTTLMKVQDPLLEWEKSLKTIEDKAIILFGLT ->t76 -GILKLFNVGDNSSKLWVRRKALQVNLVVIQSFSDEESEETPAAIKMEEIRVKWKTIYFQRTPFKIFTALDRDIFLSTASYADKKEELRQVAKT ->t77 -RINEIYRCGDAGGQQGTYKLVAVIDLLNAMLRAEEAAQGENKRYRCQVTPLAWEIGYTRQERFRLLGVQNSGLNWNVPLNEENKESIALKSLW ->t74 -GSVEFYRSGEDGKKEWGYELFKTINALIPVLRRNERAYTSLQSYAIEEIQQYANYVFTKEEFSDIIVAAQGNAEWDGHILNMEKNAIAGFVFN ->t75 -NRNPLTTCGEQGMYPWRFVVLTELDILLLAFPLVGSGGDIWAIEKQEAFHLVEESIYIQRGEHTFWSAFEGALEKEDNIHGFAEDEIFLTLFP ->t78 -SKHSVGAVNKQGRYSCLHGRTSGLAITTVKYGDTVAAETPLQVEKILDVRHLFHVVYTQGPEFDIIQRILGNITYFEPRLEWKTCSINPQGLP ->t79 -GLTEEAKTDNNGPRNWSFLFTINVDIILVEIPEPSYLDKVYVAEEISSLLWHHTSTRITFDEVYMWAGQESNLKSNRDLDFLLEDGSAMWLYT ->t98 -GMLRFFRLGDLGTMVVVRFKAVKARRMLNQNFSDERGQESPMCHKLARKTVAFKMLYFLEITFMMAGEVEFSIFTDTGVYSDEEAPPRLTGLT ->t99 -GLSEIARTDNRGVRDWNFIVLVSVDAILQEIKKPTFLDNVHISRCFTSLIIQQETSRFSDDESHIWAGQEARKNTEDALDHLLKKGTAVWVYT ->t94 -SIKQFYKTGDNGRSRWEYPISATLCSIIPVNREDERGYQALQVMNLESIRMFDQFVYSKTDSSPLLVGEQGDLDWAVPIHNMDEDALAAFVYS ->t95 -TLSQFHRVADNGADKWGYDVVVFLSVSLAVLAEAENGYIELHEQQIEATKHMPEDVYRLTTKSTIAVAEQGKLKQRTPLKYVKLETFLTYKAS ->t96 -KAHRRNLADDRCQDCWTYEHVGGLDIITSSFQNKNASQFRLYLMRIIGVEYLFPVVYVKAEQFRYIAESVGVIITNGDTVALEEAVTTKKGMP ->t97 -DLDELCRSENSPERKWQIKVIAKFELIIDEIAEAEKVEQVSVDKDIDNLILFHSETFISSEEVKIWDTQTANLNAENHLFDIFEPGIPLWHFV ->t90 -GINHFYRTGNNGKDKWTYRLVPTLNTFIPALREEERGYVFIRVWQLNSLQLFALMVFTKAETSSLLLGHSGNLEWEKLLHNMEEDALASAVFS ->t91 -AARNRYKSGDNGKEAVIYSEEGGLELLIVRLAESDSSQTTARVLKLKAVKYIWQAVYTRGHTIEIIMRRSGATTVDTPTYELETTVIPTMGLS ->t92 -ALNPIGQSGHNGTRDWNFIVIVNLDIIFLEIPAPSNSEYTLSSHRAEPLMMFKSITLLQEQEHRLWGAQEGNLDTDDPLYFLSEDGLALYIFT ->t93 -GVKSLADADNKEREDWTFLLIQKDCLILMSCARVVFVDNAYPSEKISNLIMHFAQTNPTSDSVRCFDGAELKLKGEDDAYYLLKEGTNLWLYS ->t8 -GLLKFFNVGNNSAKLWVRRKAVRVDLVVLHTFSDEGSEEKPGAIKMEEIQVKYKTIYFLETPFKIFCALDKDIFMASSTYVDKKEELRALAKT ->t9 -NAHREVKSYNNGRESWIKYSVGNVQILTVRFQDNHFAQTSLHTLKIKAVKYLWQAVYTLGQKFHIITTAQTVIKDESPTYELEASVRTQVGYP ->t6 -QGNGRSLTSEDSRVRWIYSKTDLRSIRTMLDKNDDAQASHTCCFKVLALSALFKLTLVEGTDFHRELNVASALDKERPNFEFKTDVLSEQGNK ->t7 -GILRFFKAGNSGSELWVRRKAASGQLALLAGYTDERGQESPAVHKLEKVRVFWQLIYYLETSCKLYSGNDGELFKETPVYDTEKDPLRMFGLA ->t4 -AQELVGMVHEKGRNSWIYGKTGDVALWNVKCNTNYSDQHYLSQLKIHVVKHIYRVTCVQAEEYPLTYNMFAEFQMEDPRAERKSVRIQQQGVG ->t5 -DGVSTLWTRESPRTFWIYGMCGAPLVRRVFPENTTALFSHITPIKLSQLAVILSLKWCSATVFNKELNLIDVIEIEEPPFEHRTGVLVDESKK ->t2 -TLSGFHRTADNGQDPWGYDVALFLEATLSVLSQEERGYLELMLKQIEAMRMAPEAVYRRTNRSTIAVNEQGKLKERTPLTYKQEETLLVYKNS ->t3 -PNEWEGAATRQGGFGCFFGRTGPLKIATVSYGDTRQVEAHLAVEIIVDRRLVLYLERNHAPTLDLIGQIKKAFLYYTPKLFWKETEMSPKGPP ->t1 -PNSWEGASPRQGRFGCLFGRTGPLIIPTLAYSNNAQVEDNLSVDFLADKRLVTYLDKNEAPSCQMLGKLRSALIMVALKINMKKARASDKGPP ->t14 -GIDQIFKCNENRSKLWSISVIQTFSILIDALRKDTFASPGSAHFKISSVLIIEYITWIENQQHLVFTAHEAHTNQSGPLLTLDDNGAFLYFHS ->t15 -GINQLFKCNENRSKLWSIELIQSFSIILDALRKDRLASPGSASFKIDSYLIIEYIIWIEDEQHLLFTAHEAQTNSIAPWVTLDDSGAFLYLHT ->t16 -AGHYKGTTKERGRKSWLYKELDGIAIWNLKARNNALEQPHQSQLKILALKHLFRVTYVQGDKYHLVFHLAGEFEYEDPRLELKTSAINQQGMG ->t17 -PNHWEGAVTRQGGFGCFFGKTGALMIPTVAYGDTRQVENHLQVEIIVDRRKILYLERNHAAAVDIIGQIQSALMYFELKLNWKESEMSPQGAP ->t10 -NAPPLPTSGNDGEGNWRWSVLCALAIIITEHPFDASSDERLLTHKLSQLNMIRSPVWIQNKDSKLCNAQEDELSRETPLQGLREEPLTLYIFI ->t11 -SEESYQKNQEDDRRKWAPGQSGGVLIETTTYVDHEVHVSSNCVPRILAEALIVRVTYVQGAYILQPVHIMSLIKFEDPNCPLNTDTLQEQSNK ->t12 -SLPPLPTSGNNGSNNWRYSVLATLAIILTERPYDASGDESLLTHKLKQLNMVESIVFIVNKEAKLCSAQEGEIKKEAPLQGLREEPLVLCLFI ->t13 -GLTEIARTDGQGVEDWNFIVLLNVNIIFLEIRKPTILTKVRISRQLSSLIIQKDSSRFSNDESHIWAGQESRLNVEDALDYLLKDTTGRWVFT ->t18 -LLNQLYRTGDNGQEKWTYELVVTLNATIPALSEDERGYIALHVNQLEALRMMAQYVYQRTTGSTIFAAEQGKLKQAAPLRNVEEDEIASYKIS ->t19 -DLNEMCRSEDTPKRSWQFKAMAKLELIIEKITEAAKIETVYVDENISNLIMFSSETFIECDEVKLWGAQEAKLKCEDHLFNMLEDSISLWHFV ->t29 -FIEPQHATDANHQEGYNYSRVKAVEMLLEGLEVQKRGEDGFSLYSFNVFELYQNDILIENEKMMLFAKHIAHYELDKSHHEMDSKKNFLYLHV ->t28 -SEQANDLEQEEGRRSWVYGRSGGLKVNATTHKNHKVQVSHRCVLRILAVTLIVRVTWVRGSAFTQEVHLMGFIDYDDPNCPIKENTLNQQGNK ->t21 -NRTPLTTCGEQGYKNLRYVVLMELDILLFAWPFVASGGDFFAISKFEAFRLVEEIIYIQRGEWKFWSALQGKLEQEDSVDGFPEGVIFFCLFS ->t20 -KLNELMKADGDRSKLIAQTFLVAIYILVGGTRVTSQGEACMRVLGIHAGKFFWNLDYAKSNSIKILLDQRAILEKDNNLSQIDSMYIALVTLN ->t23 -FLDPTHTGNAEQQDRWYYNQVKQLELLLETLELNVRDEAPITVFSNNPTTLWEDVILLKTETMLMFAQHCPHAELDNEHHEIYTRQNFCYLYS ->t22 -FIVYFFRSGDNGSELWAHRTASTLDVTTAASDTDEKGQAALAVYKLEVVRVFWQLVYKKMSKIKLVAANGGELFRRSPVYETDTNPILLIGIS ->t25 -SINELYRTGDNGKEKWSYKLVNIIDRKLAKLRADDKGQAAIRVNQLEALKTLWRLVYQRGQKFKLLLSQQGKLEWATDLHELDDNILVAFEFS ->t24 -SLNPIGQAGHNGTRQYDHIVITRLEILFLELPAPSEEDYTFGEHSGQPLITYKDLTLAQDEELELWGAQETNLETDDPLYFFAEDQLVIYLFS ->t27 -SKHWSGAVAEQGKISCIFSESTGMAITPEKYGETVTAKTHLALKPIVDNRLEYYVEFTSGPGFDIIQPILGGLTYFEPKLEWRALALNPQGLP ->t26 -RINKLYKCGDSGAEQCTYKLVAVIDLLNATFRKEEAAEGENKLYRCQATHLAWEIGYTRQERFRLLGVENDGLNWNLPLNEENKQSIALMSLW ->t49 -ISDKRDFGDSRCSDMWIYEEVGSLDIISAAFLTKNAEAVRAHVLKIILVKYIYPVAFVIASQVKYIAETDGTMVQNGSALPCEKNVPSRGGKE ->t48 -TVNQSYDSPGDRTLKYVGGILLEIQIRVPPFQQRRSARASIKLFDFQRAKLHWTINYLRNQDTSFLTIQEHLIEWDACVNRQDDTAITLVGLN ->t43 -PSHWEGSSARQGNFGCVFGSTGPLKIPTLAYKDTAQVEDHLSVEIMADKRLVIYLERNNAPQLQMIGGLRSALMMVALKLNWKEAEATPKGPP ->t42 -PNHWPGAINEQGKLVCFMGRTSRLFINVPSYSGTTVVENHLKVELIMDRKLPFYVDHHHGPKFDIIGQIRYGISYFDPKLNWKTSELQPQEVP ->t41 -TLNQFHRTADNGQDRWGYDVAVFLDASLSVLSEDERGYMELHLSQIEALRMMPEDVYSRTTKSTIAVSEQGKLKERTPLKYVEEEEIITYKAS ->t40 -GLLRLFNAGDNSSKLWVRRKALQVALVVLQHFSDEGPEEQPAAIKMEEIRVKYKTIYFQENPFKIFTALDRDIFLSTASFNSKKEELRHVAKT ->t47 -RQTELAKKDNRGPVNWYFIFILDVDIILVEIPEPAFLDKVYASEQITSLALHHSSSQIQSDDVHMWAGQEANFKNERGLDYLLEDGSALWLHT ->t46 -DLNELARGDNKGPRGWRFTLCAMLDLLIERISEAVQLEKVYAQENITNLLMFNNETLIKCDEVKLWAEQDAKLKLEDPLYNMMEDELTMFHFA ->t45 -EIDQFWYCEEKTSKPWTYTVAKTFTILVDSMRSDTASNNGEGRIKLSSLQIIEQITYLVGHQHKVFSANEAATDQHGSLITLDDQGAYLYLAV ->t44 -RLNELAKTDGDKKEAFRCDFMCAIHILVGATRVDEAGMVTLRVYKIEGAGMFWTLEYSKNNSFKIFAVQKNILETENNFSQLDNMSIVLVTMN ->t38 -TVNQIYSAPGDRGVKFLGGVLLELEIKVPPFKQREASRASIKLFTFHRARIDWTISYLRNSHANILATQEHLIEWEKLVNKNDKTAMVLVGLD ->t39 -SINELYRTGDNGKEKWSYKLVNTIDRKLAKLRADDKGQAAIRVNQLEAVKTLWQLVYQRGQKFKLLLSQQGKLEWATDLHDLDDNIIVAFEFS ->t36 -AGQHKGMTHKKSRKSWIYGKTGNIAIWNVKCNNEYSDQHAITQLEILVIKHLYRVTCAQGEEYPMVYNLMSEFAIDHPRSEIKTVKIEMQAVG ->t37 -SGQAWEMTSEGGSRSLLYGESSGVAIFTTTHNNDNAHVLHLCVFKILAVRSIFRVSHAMGEDYHMEAYLVGVLDYEDPKCEIKTEVTQQRSMP ->t34 -TKRRSSALGEHGLISCTFGESDGLSINSEKYGDSVTDETYLAVESIVDSRLIFYLDFTQAPGYDLIQTILGGITYFEPQSQWVPQAVNPEGLP ->t35 -SLNEIHRIDADAGEKFSTGIIGDIKIKAGAMSTRENGHLALKLETLLRERIYWTIEMNRSNHTNILAVQNTLLDWEKKLNKPDSDALVLAGIN ->t32 -KPLERNLCNENFKNFWIYENVGGIDILVTLYGRKNVPQLRLTLMRIIEVEYIPPVVLVKADQFRYVTDAVGATIRNNTSVALEASVNDKEGMP ->t33 -TSKHRCKTADKGKKVVIYATTAVATIIIARYMGGETGLTDQKVKKIKAAKAAWQDVYSRGETLRIILARTESFATDQPTNNLETCYFKDDGFS ->t30 -TKRRSSSLGEEGLISCAFGESGGISINSEKYGESMTDDTYLAVEAIVDQRLIFYVDFSQAPGFDLIKTILSGITYFEPQSMWVPSAVNPEGLA ->t31 -NAHRHVRSYNNGVGSWIFYSVGNVKILTVRFRDNYFGQKSTAWLKLRAVVYIGAAVYALGKKFNIIATQGTVLQDETPTYDLEATVRTQVGYP ->t58 -KLAEIAASSGDRSNLIANSFKVSIYILTGGTRLGSEGEACLRCYGFFAGKFFWHLEYSKKGAFRIIHDMQSLVETDNNLTNIDQVLIAIVNLN ->t59 -GGGEKGAWNERAKKSWLKGRSPGLAITTVKANNTNAELSFLTALKLLEARLLFRAVFVEGDEFHHIFVLSGEPEYEDPATALRQTVINRQDMG ->t50 -SLDPIGQAGHNGTIQYDHIVLVRLEMLALDLPAPGPEDYTFGEQSGQPIITYKSLSLARDEDHELWGSEETNLETDSTMYFIAEDALVLYLFS ->t51 -DGQARAANRESPRRSWIYGQCGAPLVRTVFPEDNTAFFAHLAPIKMSTLSVIFKLRWCADSVFNKELRWLRVIDLEEPPLEIRNDTLASRGRR ->t52 -GVLKFFNLGDSGSRLWVKRKSAKADLFLIQNFTDDKGKENPAVEELQRLRVLWKVTYFLETPFKQFTAINKDIFTDTGSYTDRKEELRLVGLL ->t53 -ALNPICQVGHTNSRAWSYTVFANINLLLFELPEPTHNDKSFGSHQSRPLKLFNALTLLQKKEIRLWGAEEGFLESEEPLYYLTEDGLALYLFT ->t54 -FIDPTHTSNAEQQNRYSYNRVKALELLMQGYDVDMVHDRPCSVFSLNSYTLWEAVLLIKREKLVLFAEFLAHADLDNPHHELRSKKNFLHLFS ->t55 -ELDQTWLCEEKSSKPWTYPLAKTFTILFDAMRSDTHANKGTAAIKISSLEILEQIAYMMGQTYKLFSASDASEDLYGALFTVDDLGAFLYLKA ->t56 -FIDRTHTPNAKKQKRYAYARLKAFQILLQGIQANLRGNRPYAVISLKTYRMFEEVIFLKREKVMLFAEHCDPAELDNPHHAIQDKLNLSYLFS ->t57 -GINELCKADNKGTRDWNFIVIANLELILLEIPEPVNLDKVYASEQITKLIMFNSQTLITSEEVKMWAGQESDLKSEDPLYYLMEDGLAPWLFT ->t69 -AARHRTQTGDRGKETKIFAEKGTTRAIIVRYKGVQTGLTTTRVIKLKAAKALWQDVYTRNQLIHIILTRTEEFETDEPIRELKTPVIKKMGLS ->t68 -KAHRRDFSDNRCKDMWIYDSVGGLDLITAAFLTKNAERVRIYVMKILAVKFIYPVVFVTAEQVRYIAESTGTIVKNGTTLPIAKAVSSKKGLE ->t65 -KPLEKSLVNQNLKNRWAYENVQCIDILVKKFSHKNVARLHLATTKISAVEYFYPLTLVKLDELQYVNDSVGASINNDSPVALEVSLTSKEGAP ->t64 -GALIFYNNADQCEKLWVKTEAVQVDMMLVQTFTDEKSKDQPCPHKMEKLRVLRKLQYFLETPFKQFTPLHKNIFINLASHVNKKDELRPLGRA ->t67 -GLRDLADADNKEREQWTFLLIQKDCFILMSCARVVFVDHAYPSEKISNLLMVFAQTQPTSDSVRAFEGAEHKLKGSDGAYYLIKEGTNLWLYT ->t66 -SLRPLCNSGANGKKDWKLAVLANLDLIILSLPEDQTGDKAFAGQKLSSFLQFESICLIKNTEFKFSADQEGKIHKETPLQSVFEDGLLLYLQY ->t61 -NLTPLTTVGNEGLKNWRYVVLADLDILLFAWPFDASGNDFFVTNKIEAFELVEKIIFIQHGEFKFWSAQEGNLEKESSLHSFPEDSIVYCFFN ->t60 -DGYGTALIRESPREDWIYGQCGASMNKMVFPENKTVLHSYLRVAEMLALSAIYKLTWLECSPFNKELNWLAVIDLESPPLEVRTDVLTQKSNK ->t63 -TIRQMYKVGSHGKERWAFVIYSTLNAGIPSLKRNEANYTSLESYRLEQILKYDNFTFTKSQTNDILVGAQGNLNFDGHIIHGDKNARAGFVTN ->t62 -GLTDLAESDDRKKEQWNLLPISRQDMILIEASTVVLLDKVYASKTITNLIMHYAQTHPSTDRVRVWEGAESSLQGENAIYYLLKDGKSLWLFA - -(((((((t64:0.3606024368,((t76:0.07110876096,t40:0.1424853358):0.04973365551,t8:0.2899517035):0.4947136129):0.3807704499,t52:0.2269341115):0.1847564362,t98:0.4985795527):0.3663309289,t7:0.08221627301):0.4930271181,t22:0.1849724769):0.4778221719,((((t44:0.2423516827,(t20:0.3251431013,(t89:0.3092031451,t58:0.1505064272):0.4025580537):0.4286443567):0.2904799986,(((t38:0.1402271807,t48:0.4870806962):0.2571757093,(t71:0.4956153091,t72:0.3339877091):0.2346285542):0.2450319001,t35:0.4367376456):0.4670766103):0.466915441,((t77:0.1139508952,t26:0.1067133951):0.3971192835,t86:0.1983219385):0.4393141493):0.1205377948,(((((t14:0.02029250841,t15:0.281296535):0.2718314606,(t45:0.2919122459,t55:0.3498234356):0.4335514692):0.1938185657,(((t87:0.4979138259,t80:0.3059658932):0.4135256986,(t56:0.2262222278,(t29:0.472914012,(t23:0.4945761132,t54:0.3574005106):0.03184049114):0.2300762665):0.217781904):0.209498541,t88:0.06942030245):0.4670162292):0.4842282808,((t66:0.2814837691,((t12:0.03035500803,t10:0.3447520284):0.3703268009,((t75:0.4201595175,t21:0.1261696893):0.3239667865,t61:0.08817552212):0.4782025606):0.3340104482):0.01493061628,(((t53:0.1895854427,t70:0.3173048322):0.3175420778,((t50:0.2679380032,t24:0.07510074743):0.4590908619,t92:0.09916294264):0.2709395609):0.1051948775,(((((t47:0.1769465422,t79:0.2220749834):0.1201107192,(t13:0.1222603365,t99:0.213869276):0.479531735):0.09309362853,(t62:0.2525335232,(t67:0.1923670083,t93:0.04931126557):0.3662042076):0.3751398511):0.07962548341,((t19:0.05630615896,t97:0.4159038917):0.2896688411,t46:0.2602726201):0.4100352997):0.08420396179,t57:0.08073580357):0.3843659966):0.3910870624):0.3481126705):0.4491288221,((t25:0.06793258176,t39:0.08117911582):0.3487825938,((t18:0.0873994784,((t41:0.01168657025,(t2:0.2561471625,t95:0.2985943773):0.02140677267):0.04485916993,t84:0.1603361811):0.4727166472):0.4580918577,((t94:0.4078695604,((t85:0.03326860536,t63:0.3614253201):0.10455673,t74:0.3381562195):0.2733652182):0.02264785287,t90:0.3708612662):0.1524549018):0.2679572008):0.1716852445):0.03433820707):0.01948481683):0.2828592779,(((t91:0.2855187082,(t33:0.3941599377,t69:0.3769563341):0.3654390179):0.282344563,(t9:0.1197436449,t31:0.3429310846):0.3153875459):0.03762102306,(((((t36:0.2556156321,t4:0.3991125243):0.3163064233,t16:0.2450880213):0.127519336,(t83:0.253097747,t59:0.3882891485):0.1479915127):0.08621783708,((((t27:0.3523899782,(t34:0.01997920744,t30:0.2195461933):0.4540820866):0.1362968081,(t42:0.4354496221,(t17:0.09048940359,(t3:0.2808522881,(t1:0.2429713938,t43:0.0789142305):0.2870380949):0.1319656512):0.2942173181):0.3372373368):0.1355608598,t78:0.07558623092):0.4945718877,(((t11:0.4848752463,t28:0.3136861725):0.09002469478,((t60:0.4213304874,(t5:0.4206467746,t51:0.1545405396):0.3223590991):0.3250868191,(t82:0.1022084421,t6:0.339794634):0.0968154598):0.3240341619):0.114426395,(t37:0.2539452433,t73:0.1001648518):0.2247688102):0.2597665595):0.0190305293):0.4555100037,(((t32:0.1174101683,(t65:0.06435161829,t81:0.1552950304):0.4953182313):0.4233182743,t96:0.1716475798):0.1057550281,(t49:0.3558416233,t68:0.116109564):0.3425032881):0.4232502208):0.4171057341):0.4273452594); \ No newline at end of file diff --git a/tests/hbltests/libv3/data/protgtr_fitter_lines_raw.txt b/tests/hbltests/libv3/data/protgtr_fitter_lines_raw.txt index 227f5994c..930ef8c9f 100644 --- a/tests/hbltests/libv3/data/protgtr_fitter_lines_raw.txt +++ b/tests/hbltests/libv3/data/protgtr_fitter_lines_raw.txt @@ -1,5 +1,3 @@ -tests/hbltests/libv3/data/protgtr_fitter_alignments/prot1.dat -tests/hbltests/libv3/data/protgtr_fitter_alignments/prot2.dat -tests/hbltests/libv3/data/protgtr_fitter_alignments/prot3.dat -tests/hbltests/libv3/data/protgtr_fitter_alignments/prot4.dat -tests/hbltests/libv3/data/protgtr_fitter_alignments/prot5.dat \ No newline at end of file +tests/hbltests/libv3/data/protgtr_fitter_alignments/aa1.fasta +tests/hbltests/libv3/data/protgtr_fitter_alignments/aa2.fasta +tests/hbltests/libv3/data/protgtr_fitter_alignments/aa3.fasta \ No newline at end of file From ff4eabc9afd08c6b61f278aa8e29eb7f4a365760 Mon Sep 17 00:00:00 2001 From: Steven Weaver Date: Mon, 16 Oct 2017 17:06:10 -0400 Subject: [PATCH 18/19] closes #666: incorrect # branches reported in MEME results. Should be safe for at least 1000 years now. --- .../SelectionAnalyses/MEME.bf | 230 +++++++++--------- tests/hbltests/libv3/MEME.wbf | 9 +- 2 files changed, 124 insertions(+), 115 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf index 29ebe7d6d..41ff3b36f 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/MEME.bf @@ -263,7 +263,7 @@ for (meme.partition_index = 0; meme.partition_index < meme.partition_count; meme meme.table_output_options[utility.getGlobalValue("terms.table_options.header")] = TRUE; meme.model_to_branch_bsrel = { "meme.bsrel" : utility.Filter (meme.selected_branches[meme.partition_index], '_value_', '_value_ == terms.tree_attributes.test'), - "meme.background_fel" : utility.Filter (meme.selected_branches[meme.partition_index], '_value_', '_value_ != terms.tree_attributes.test')}; + "meme.background_fel" : utility.Filter (meme.selected_branches[meme.partition_index], '_value_', '_value_ != terms.tree_attributes.test')}; model.ApplyModelToTree( "meme.site_tree_fel", meme.trees[meme.partition_index], {terms.default : meme.site.background_fel}, None); @@ -275,16 +275,16 @@ for (meme.partition_index = 0; meme.partition_index < meme.partition_count; meme '_node_class_ = (meme.selected_branches[meme.partition_index])[_node_]; if (_node_class_ != terms.tree_attributes.test) { _beta_scaler = "meme.site_beta_nuisance"; - meme.apply_proportional_site_constraint.fel ("meme.site_tree_bsrel", _node_, - meme.alpha, meme.beta, "meme.site_alpha", _beta_scaler, (( meme.final_partitioned_mg_results[utility.getGlobalValue("terms.branch_length")])[meme.partition_index])[_node_]); + meme.apply_proportional_site_constraint.fel ("meme.site_tree_bsrel", _node_, + meme.alpha, meme.beta, "meme.site_alpha", _beta_scaler, (( meme.final_partitioned_mg_results[utility.getGlobalValue("terms.branch_length")])[meme.partition_index])[_node_]); } else { _beta_scaler = "meme.site_beta_plus"; - meme.apply_proportional_site_constraint.bsrel ("meme.site_tree_bsrel", _node_, - meme.alpha, meme.beta1, meme.beta2, meme.branch_mixture, "meme.site_alpha", "meme.site_omega_minus", - _beta_scaler, "meme.site_mixture_weight", (( meme.final_partitioned_mg_results[utility.getGlobalValue("terms.branch_length")])[meme.partition_index])[_node_]); + meme.apply_proportional_site_constraint.bsrel ("meme.site_tree_bsrel", _node_, + meme.alpha, meme.beta1, meme.beta2, meme.branch_mixture, "meme.site_alpha", "meme.site_omega_minus", + _beta_scaler, "meme.site_mixture_weight", (( meme.final_partitioned_mg_results[utility.getGlobalValue("terms.branch_length")])[meme.partition_index])[_node_]); } meme.apply_proportional_site_constraint.fel ("meme.site_tree_fel", _node_, - meme.alpha, meme.beta, "meme.site_alpha", _beta_scaler, (( meme.final_partitioned_mg_results[utility.getGlobalValue("terms.branch_length")])[meme.partition_index])[_node_]); + meme.alpha, meme.beta, "meme.site_alpha", _beta_scaler, (( meme.final_partitioned_mg_results[utility.getGlobalValue("terms.branch_length")])[meme.partition_index])[_node_]); '); @@ -327,23 +327,23 @@ for (meme.partition_index = 0; meme.partition_index < meme.partition_count; meme ' if (_pattern_info_[utility.getGlobalValue("terms.data.is_constant")]) { meme.store_results (-1,None,{"0" : "meme.site_likelihood", - "1" : "meme.site_likelihood_bsrel", - "2" : None, - "3" : meme.partition_index, - "4" : _pattern_info_, - "5" : meme.site_model_mapping + "1" : "meme.site_likelihood_bsrel", + "2" : None, + "3" : meme.partition_index, + "4" : _pattern_info_, + "5" : meme.site_model_mapping }); } else { mpi.QueueJob (meme.queue, "meme.handle_a_site", {"0" : "meme.site_likelihood", - "1" : "meme.site_likelihood_bsrel", + "1" : "meme.site_likelihood_bsrel", "2" : alignments.serialize_site_filter ((meme.filter_specification[meme.partition_index])[utility.getGlobalValue("terms.data.name")], (_pattern_info_[utility.getGlobalValue("terms.data.sites")])[0]), - "3" : meme.partition_index, - "4" : _pattern_info_, - "5" : meme.site_model_mapping - }, - "meme.store_results"); + "3" : meme.partition_index, + "4" : _pattern_info_, + "5" : meme.site_model_mapping + }, + "meme.store_results"); } ' ); @@ -411,34 +411,34 @@ function meme.apply_proportional_site_constraint.bsrel (tree_name, node_name, al lfunction meme.compute_branch_EBF (lf_id, tree_name, branch_name, baseline) { // TODO: figure out why LFCompute fails if this is run as an `lfunction` - parameter_name = "`tree_name`.`branch_name`." + ^"meme.branch_mixture"; - ^parameter_name = 1; + parameter_name = "`tree_name`.`branch_name`." + ^"meme.branch_mixture"; + ^parameter_name = 1; - LFCompute (^lf_id,LOGL0); + LFCompute (^lf_id,LOGL0); - utility.ExecuteInGlobalNamespace (parameter_name + ":= meme.site_mixture_weight"); + utility.ExecuteInGlobalNamespace (parameter_name + ":= meme.site_mixture_weight"); - if (^"meme.site_mixture_weight" != 1 && ^"meme.site_mixture_weight" != 0) { - _priorOdds = (1-^"meme.site_mixture_weight")/^"meme.site_mixture_weight"; - } else { - _priorOdds = 0; - } + if (^"meme.site_mixture_weight" != 1 && ^"meme.site_mixture_weight" != 0) { + _priorOdds = (1-^"meme.site_mixture_weight")/^"meme.site_mixture_weight"; + } else { + _priorOdds = 0; + } - normalizer = -Max (LOGL0,baseline); + normalizer = -Max (LOGL0,baseline); - p1 = Exp(LOGL0+normalizer) * ^"meme.site_mixture_weight"; - p2 = (Exp(baseline+normalizer) - p1); + p1 = Exp(LOGL0+normalizer) * ^"meme.site_mixture_weight"; + p2 = (Exp(baseline+normalizer) - p1); - _posteriorProb = {{p1,p2}}; + _posteriorProb = {{p1,p2}}; - _posteriorProb = _posteriorProb * (1/(+_posteriorProb)); - if ( _priorOdds != 0) { - eBF = _posteriorProb[1] / (1 - _posteriorProb[1]) / _priorOdds; - } else { - eBF = 1; - } - return {utility.getGlobalValue("terms.empirical_bayes_factor") : eBF__, utility.getGlobalValue("terms.posterior") : _posteriorProb__[1]}; + _posteriorProb = _posteriorProb * (1/(+_posteriorProb)); + if ( _priorOdds != 0) { + eBF = _posteriorProb[1] / (1 - _posteriorProb[1]) / _priorOdds; + } else { + eBF = 1; + } + return {utility.getGlobalValue("terms.empirical_bayes_factor") : eBF__, utility.getGlobalValue("terms.posterior") : _posteriorProb__[1]}; } //---------------------------------------------------------------------------------------- @@ -455,7 +455,7 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa GetString (lfInfo, ^lf_bsrel,-1); __make_filter ((lfInfo["Datafilters"])[0]); - bsrel_tree_id = (lfInfo["Trees"])[0]; + bsrel_tree_id = (lfInfo["Trees"])[0]; utility.SetEnvVariable ("USE_LAST_RESULTS", TRUE); @@ -469,85 +469,85 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa fel[utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; - ^"meme.site_mixture_weight" = 0.75; - if (^"meme.site_alpha" > 0) { - ^"meme.site_omega_minus" = 1; - } else { - ^"meme.site_omega_minus" = ^"meme.site_beta_plus" / Max (^"meme.site_alpha", 1e-6); - /* avoid 0/0 by making the denominator non-zero*/ - } + ^"meme.site_mixture_weight" = 0.75; + if (^"meme.site_alpha" > 0) { + ^"meme.site_omega_minus" = 1; + } else { + ^"meme.site_omega_minus" = ^"meme.site_beta_plus" / Max (^"meme.site_alpha", 1e-6); + /* avoid 0/0 by making the denominator non-zero*/ + } - Optimize (results, ^lf_bsrel); + Optimize (results, ^lf_bsrel); alternative = estimators.ExtractMLEs (lf_bsrel, model_mapping); alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; - ancestral_info = ancestral.build (lf_bsrel,0,FALSE); + ancestral_info = ancestral.build (lf_bsrel,0,FALSE); //TODO - branch_substitution_information = selection.substitution_mapper (ancestral_info ["MATRIX"], - ancestral_info ["TREE_AVL"], - ancestral_info ["AMBIGS"], - ^"meme.pairwise_counts", - ancestral_info ["MAPPING"], - (^"meme.codon_data_info")[utility.getGlobalValue("terms.code")]); - - - DeleteObject (ancestral_info); - - branch_ebf = {}; - branch_posterior = {}; - - if (^"meme.site_beta_plus" > ^"meme.site_alpha" && ^"meme.site_mixture_weight" > 0) { - - LFCompute (^lf_bsrel,LF_START_COMPUTE); - LFCompute (^lf_bsrel,baseline); - - utility.ForEach (^bsrel_tree_id, "_node_name_", - ' - if ((meme.selected_branches [^"`&partition_index`"])[_node_name_] == utility.getGlobalValue("terms.tree_attributes.test")) { - _node_name_res_ = meme.compute_branch_EBF (^"`&lf_bsrel`", ^"`&bsrel_tree_id`", _node_name_, ^"`&baseline`"); - (^"`&branch_ebf`")[_node_name_] = _node_name_res_[utility.getGlobalValue("terms.empirical_bayes_factor")]; - (^"`&branch_posterior`")[_node_name_] = _node_name_res_[utility.getGlobalValue("terms.posterior")]; - } else { - (^"`&branch_ebf`")[_node_name_] = None; - (^"`&branch_posterior`")[_node_name_] = None; - } - ' - ); - - LFCompute (^lf_bsrel,LF_DONE_COMPUTE); - - ^"meme.site_beta_plus" := ^"meme.site_alpha"; - Optimize (results, ^lf_bsrel); - - null = estimators.ExtractMLEs (lf_bsrel, model_mapping); - null [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; - - - - } else { - null = alternative; - utility.ForEach (^bsrel_tree_id, "_node_name_", - ' - if ((meme.selected_branches [^"`&partition_index`"])[_node_name_] == utility.getGlobalValue("terms.tree_attributes.test")) { - (^"`&branch_ebf`")[_node_name_] = 1.0; - (^"`&branch_posterior`")[_node_name_] = 0.0; - } else { - (^"`&branch_ebf`")[_node_name_] = None; - (^"`&branch_posterior`")[_node_name_] = None; - } - ' - ); - } + branch_substitution_information = selection.substitution_mapper (ancestral_info ["MATRIX"], + ancestral_info ["TREE_AVL"], + ancestral_info ["AMBIGS"], + ^"meme.pairwise_counts", + ancestral_info ["MAPPING"], + (^"meme.codon_data_info")[utility.getGlobalValue("terms.code")]); + + + DeleteObject (ancestral_info); + + branch_ebf = {}; + branch_posterior = {}; + + if (^"meme.site_beta_plus" > ^"meme.site_alpha" && ^"meme.site_mixture_weight" > 0) { + + LFCompute (^lf_bsrel,LF_START_COMPUTE); + LFCompute (^lf_bsrel,baseline); + + utility.ForEach (^bsrel_tree_id, "_node_name_", + ' + if ((meme.selected_branches [^"`&partition_index`"])[_node_name_] == utility.getGlobalValue("terms.tree_attributes.test")) { + _node_name_res_ = meme.compute_branch_EBF (^"`&lf_bsrel`", ^"`&bsrel_tree_id`", _node_name_, ^"`&baseline`"); + (^"`&branch_ebf`")[_node_name_] = _node_name_res_[utility.getGlobalValue("terms.empirical_bayes_factor")]; + (^"`&branch_posterior`")[_node_name_] = _node_name_res_[utility.getGlobalValue("terms.posterior")]; + } else { + (^"`&branch_ebf`")[_node_name_] = None; + (^"`&branch_posterior`")[_node_name_] = None; + } + ' + ); + + LFCompute (^lf_bsrel,LF_DONE_COMPUTE); + + ^"meme.site_beta_plus" := ^"meme.site_alpha"; + Optimize (results, ^lf_bsrel); + + null = estimators.ExtractMLEs (lf_bsrel, model_mapping); + null [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; + + + + } else { + null = alternative; + utility.ForEach (^bsrel_tree_id, "_node_name_", + ' + if ((meme.selected_branches [^"`&partition_index`"])[_node_name_] == utility.getGlobalValue("terms.tree_attributes.test")) { + (^"`&branch_ebf`")[_node_name_] = 1.0; + (^"`&branch_posterior`")[_node_name_] = 0.0; + } else { + (^"`&branch_ebf`")[_node_name_] = None; + (^"`&branch_posterior`")[_node_name_] = None; + } + ' + ); + } return {"fel" : fel, - utility.getGlobalValue("terms.alternative") : alternative, - utility.getGlobalValue("terms.posterior") : branch_posterior, - utility.getGlobalValue("terms.empirical_bayes_factor") : branch_ebf, - utility.getGlobalValue("terms.branch_selection_attributes") : branch_substitution_information, //TODO: keep this attr? - utility.getGlobalValue("terms.null"): null}; + utility.getGlobalValue("terms.alternative") : alternative, + utility.getGlobalValue("terms.posterior") : branch_posterior, + utility.getGlobalValue("terms.empirical_bayes_factor") : branch_ebf, + utility.getGlobalValue("terms.branch_selection_attributes") : branch_substitution_information, //TODO: keep this attr? + utility.getGlobalValue("terms.null"): null}; } /* echo to screen calls */ @@ -556,9 +556,9 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa function meme.report.echo (meme.report.site, meme.report.partition, meme.report.row) { meme.print_row = None; if (meme.report.row [6] <= meme.pvalue) { - meme.print_row = meme.report.positive_site; - meme.report.count[0] += 1; - } + meme.print_row = meme.report.positive_site; + meme.report.count[0] += 1; + } if (None != meme.print_row) { if (!meme.report.header_done) { @@ -593,7 +593,7 @@ lfunction meme.store_results (node, result, arguments) { 0 // total branch length of tested branches } }; - //console.log ( estimators.GetGlobalMLE (result["alternative"], ^"meme.parameter_site_mixture_weight")); + //console.log ( estimators.GetGlobalMLE (result["alternative"], ^"meme.parameter_site_mixture_weight")); if (None != result) { // not a constant site @@ -609,7 +609,9 @@ lfunction meme.store_results (node, result, arguments) { result_row [5] = lrt [utility.getGlobalValue("terms.LRT")]; result_row [6] = lrt [utility.getGlobalValue("terms.p_value")]; - filtered_ebf = utility.Filter (result[utility.getGlobalValue("terms.empirical_bayes_factor")], "_value_", "_value_>=100"); + filtered_ebf = result[utility.getGlobalValue("terms.empirical_bayes_factor")]; + filtered_ebf = utility.Filter (filtered_ebf, "_value_", "_value_"); + filtered_ebf = utility.Filter (filtered_ebf, "_value_", "_value_>=100"); if(None != filtered_ebf) { result_row [7] = utility.Array1D(filtered_ebf); @@ -628,7 +630,7 @@ lfunction meme.store_results (node, result, arguments) { '); result_row [8] = sum; - } + } utility.EnsureKey (^"meme.site_results", partition_index); diff --git a/tests/hbltests/libv3/MEME.wbf b/tests/hbltests/libv3/MEME.wbf index 826416b27..7a9f1bead 100644 --- a/tests/hbltests/libv3/MEME.wbf +++ b/tests/hbltests/libv3/MEME.wbf @@ -1,2 +1,9 @@ -ExecuteAFile (HYPHY_LIB_DIRECTORY + "TemplateBatchFiles" + DIRECTORY_SEPARATOR + "SelectionAnalyses" + DIRECTORY_SEPARATOR + "MEME.bf", +LoadFunctionLibrary("libv3/UtilityFunctions.bf"); +LoadFunctionLibrary("libv3/convenience/math.bf"); +LoadFunctionLibrary("libv3/all-terms.bf"); + +utility.ToggleEnvVariable ("OPTIMIZATION_TIME_HARD_LIMIT", 1); + +LoadFunctionLibrary(HYPHY_LIB_DIRECTORY + "TemplateBatchFiles" + DIRECTORY_SEPARATOR + "SelectionAnalyses" + DIRECTORY_SEPARATOR + "MEME.bf", {"0" : "Universal", "1" : PATH_TO_CURRENT_BF + "data/CD2.nex", "2" : "All", "3": "0.1"}); + From ff15e4a8c4e3a1b5c4aa16719bfbe1b80b99aeaa Mon Sep 17 00:00:00 2001 From: Steven Weaver Date: Mon, 23 Oct 2017 13:36:49 -0400 Subject: [PATCH 19/19] LEISR rc (#676) * gard2 for rohan * Revert "gard2 for rohan" This reverts commit 7ac2fa4e9eeb95b292e4101d5d7b2842993b945f. * Removing proteinfitter from beta branch * removing stray protein fitter datasets; updating travis.yml --- .travis.yml | 1 - res/TemplateBatchFiles/ProteinGTRFit.bf | 333 ------------ .../ProteinGTRFit_helper.ibf | 509 ------------------ tests/hbltests/libv3/ProteinGTRFit.wbf | 40 -- .../data/protgtr_fitter_alignments/aa1.fasta | 37 -- .../data/protgtr_fitter_alignments/aa2.fasta | 37 -- .../data/protgtr_fitter_alignments/aa3.fasta | 37 -- .../libv3/data/protgtr_fitter_lines_raw.txt | 3 - 8 files changed, 997 deletions(-) delete mode 100644 res/TemplateBatchFiles/ProteinGTRFit.bf delete mode 100644 res/TemplateBatchFiles/ProteinGTRFit_helper.ibf delete mode 100644 tests/hbltests/libv3/ProteinGTRFit.wbf delete mode 100644 tests/hbltests/libv3/data/protgtr_fitter_alignments/aa1.fasta delete mode 100644 tests/hbltests/libv3/data/protgtr_fitter_alignments/aa2.fasta delete mode 100644 tests/hbltests/libv3/data/protgtr_fitter_alignments/aa3.fasta delete mode 100644 tests/hbltests/libv3/data/protgtr_fitter_lines_raw.txt diff --git a/.travis.yml b/.travis.yml index b9113cc2b..0d139781a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,7 +26,6 @@ env: - METHOD_TEST=tests/hbltests/libv3/RELAX.wbf - METHOD_TEST=tests/hbltests/libv3/aBSREL.wbf - METHOD_TEST=tests/hbltests/libv3/BUSTED.wbf - - METHOD_TEST=tests/hbltests/libv3/ProteinGTRFit.wbf - METHOD_TEST=tests/hbltests/libv3/LEISR.wbf language: c++ diff --git a/res/TemplateBatchFiles/ProteinGTRFit.bf b/res/TemplateBatchFiles/ProteinGTRFit.bf deleted file mode 100644 index 7c5db505e..000000000 --- a/res/TemplateBatchFiles/ProteinGTRFit.bf +++ /dev/null @@ -1,333 +0,0 @@ -RequireVersion("2.3.4"); -LoadFunctionLibrary("libv3/UtilityFunctions.bf"); -LoadFunctionLibrary("libv3/IOFunctions.bf"); -LoadFunctionLibrary("libv3/stats.bf"); -LoadFunctionLibrary("libv3/all-terms.bf"); - -LoadFunctionLibrary("libv3/tasks/ancestral.bf"); -LoadFunctionLibrary("libv3/tasks/alignments.bf"); -LoadFunctionLibrary("libv3/tasks/estimators.bf"); -LoadFunctionLibrary("libv3/tasks/trees.bf"); -LoadFunctionLibrary("libv3/tasks/mpi.bf"); -LoadFunctionLibrary("libv3/convenience/math.bf"); - -LoadFunctionLibrary("libv3/models/rate_variation.bf"); - -LoadFunctionLibrary("libv3/models/protein/empirical.bf"); -LoadFunctionLibrary("libv3/models/protein/REV.bf"); -LoadFunctionLibrary("libv3/models/protein.bf"); -LoadFunctionLibrary("ProteinGTRFit_helper.ibf"); - - -/*------------------------------------------------------------------------------*/ - -utility.ToggleEnvVariable ("NORMALIZE_SEQUENCE_NAMES", 1); -utility.ToggleEnvVariable ("PRODUCE_OPTIMIZATION_LOG", 1); - -//utility.ToggleEnvVariable ("OPTIMIZATION_PRECISION", 1); // Uncomment for testing to make it all run faster. - - - -protein_gtr.analysis_banner = { - terms.io.info: "Fit a general time reversible model to a collection - of training protein sequence alignments. Generate substitution - and scoring matrices following the procedures described in Nickle et al 2007", - terms.io.version: "0.01", - terms.io.reference: "Nickle DC, Heath L, Jensen MA, Gilbert PB, Mullins JI, Kosakovsky Pond SL (2007) HIV-Specific Probabilistic Models of Protein Evolution. PLoS ONE 2(6): e503. doi:10.1371/journal.pone.0000503", - terms.io.authors: "Sergei L Kosakovsky Pond and Stephanie J Spielman", - terms.io.contact: "{spond,stephanie.spielman}@temple.edu" -}; -io.DisplayAnalysisBanner(protein_gtr.analysis_banner); - - -protein_gtr.filename_to_index = terms.data.filename_to_index; -protein_gtr.logl = terms.fit.log_likelihood; -protein_gtr.phase = terms.fit.phase; -protein_gtr.json.information = "information"; -protein_gtr.baseline_phase = "Baseline Phase"; -protein_gtr.final_phase = "REV-Final"; -protein_gtr.rev_phase_prefix = "REV-Phase-"; -protein_gtr.bl_phase_prefix = "REV-local-Phase-"; -protein_gtr.options.convergence_type = "convergence type"; -protein_gtr.options.tolerance = "tolerance"; -protein_gtr.options.baseline_model = "baseline model"; -protein_gtr.options.rate_variation = "use rate variation"; - -protein_gtr.analysis_results = {terms.json.analysis: protein_gtr.analysis_banner, - terms.json.input: {}, - terms.json.timers: {}}; - -protein_gtr.timers = {}; - - -/********************************************** MENU PROMPTS ********************************************************/ -/********************************************************************************************************************/ - -// Load file containing paths to alignments for fitting and assess whether to start from scratch or resume a cached analysis - -SetDialogPrompt ("Supply a list of files to include in the analysis (one per line)"); -fscanf (PROMPT_FOR_FILE, "Lines", protein_gtr.file_list); -protein_gtr.listfile = utility.getGlobalValue("LAST_FILE_PATH"); -protein_gtr.json_file = protein_gtr.listfile + ".json"; -protein_gtr.final_likelihood_function = protein_gtr.listfile + "_Final-Phase-LF.nex"; -protein_gtr.file_list = io.validate_a_list_of_files (protein_gtr.file_list); -protein_gtr.file_list_count = Abs (protein_gtr.file_list); -protein_gtr.index_to_filename = utility.SwapKeysAndValues(protein_gtr.file_list); -/********* PROMPTS **********/ - - -// Prompt for convergence assessment type -protein_gtr.convergence_type = io.SelectAnOption( protein_gtr.convergence_options, "Select a convergence criterion."); - -// Prompt for threshold -protein_gtr.tolerance = io.PromptUser ("\n>Provide a tolerance level for convergence assessment (Default 0.01)",0.01,0,1,FALSE); // default, lower, upper, is_integer - -// Prompt for baseline AA model -protein_gtr.baseline_model = io.SelectAnOption (models.protein.empirical_models, - "Select an empirical protein model to use for optimizing the provided branch lengths (we recommend LG):"); -// Prompt for rate variation -protein_gtr.use_rate_variation = io.SelectAnOption( protein_gtr.rate_variation_options, "Would you like to optimize branch lengths with rate variation?"); - -protein_gtr.save_options(); - - -if (protein_gtr.use_rate_variation == "Gamma"){ - protein_gtr.baseline_model_name = protein_gtr.baseline_model + "+F, with 4 category Gamma rates"; - protein_gtr.baseline_model_desc = "protein_gtr.Baseline.ModelDescription.withGamma"; - protein_gtr.rev_model_branch_lengths = "protein_gtr.REV.ModelDescription.withGamma"; -} -else { - if (protein_gtr.use_rate_variation == "GDD"){ - protein_gtr.baseline_model_name = protein_gtr.baseline_model + "+F, with 4 category GDD rates"; - protein_gtr.baseline_model_desc = "protein_gtr.Baseline.ModelDescription.withGDD4"; - protein_gtr.rev_model_branch_lengths = "protein_gtr.REV.ModelDescription.withGDD4"; - } - else { - protein_gtr.baseline_model_name = protein_gtr.baseline_model + "+F"; - protein_gtr.baseline_model_desc = "protein_gtr.Baseline.ModelDescription"; - protein_gtr.rev_model_branch_lengths = "protein_gtr.REV.ModelDescription"; - } -} -/********************************************************************************************************************/ - - - -protein_gtr.startTimer (protein_gtr.timers, "Total time"); - - -protein_gtr.queue = mpi.CreateQueue ({ utility.getGlobalValue("terms.mpi.Headers") : utility.GetListOfLoadedModules ("libv3/") , - utility.getGlobalValue("terms.mpi.Functions") : - { - {"models.protein.REV.ModelDescription.withGamma", - "models.protein.REV.ModelDescription.withGDD4", - "protein_gtr.REV.ModelDescription", - "protein_gtr.REV.ModelDescription.withGamma", - "protein_gtr.REV.ModelDescription.withGDD4", - "protein_gtr.REV.ModelDescription.freqs", - "protein_gtr.Baseline.ModelDescription.withGamma", - "protein_gtr.Baseline.ModelDescription.withGDD4", - "protein_gtr.Baseline.ModelDescription", - "protein_gtr.fitBaselineToFile" - } - }, - utility.getGlobalValue("terms.mpi.Variables") : {{ - "protein_gtr.shared_EFV", - "protein_gtr.baseline_model_desc", - "protein_gtr.rev_model_branch_lengths", - "protein_gtr.baseline_model", - "protein_gtr.index_to_filename", - "protein_gtr.analysis_results", - "protein_gtr.baseline_phase" - }} - }); - - - -io.ReportProgressMessageMD ("Protein GTR Fitter", "Initial branch length fit", "Initial branch length fit"); - -protein_gtr.fit_phase = 0; -protein_gtr.scores = {}; - -/*************************** STEP ONE *************************** -Perform an initial fit of Baseline model+F(+/-4G) to the data (or load cached fit.) -*****************************************************************/ -console.log("\n\n[PHASE 1] Performing initial branch length optimization using " + protein_gtr.baseline_model); - -protein_gtr.startTimer (protein_gtr.timers, protein_gtr.baseline_phase); -protein_gtr.timer_count +=1; - -for (file_index = 0; file_index < protein_gtr.file_list_count; file_index += 1) { - - io.ReportProgressMessageMD ("Protein GTR Fitter", " * Initial branch length fit", - "Dispatching file '" + protein_gtr.file_list[file_index]); - - - mpi.QueueJob (protein_gtr.queue, "protein_gtr.fitBaselineToFile", {"0" : protein_gtr.file_list[file_index]}, - "protein_gtr.handle_baseline_callback"); -} -mpi.QueueComplete (protein_gtr.queue); - -protein_gtr.stopTimer (protein_gtr.timers, protein_gtr.baseline_phase); - -// Sum of the logL from fitted baseline model across each data set -protein_gtr.baseline_fit_logL = math.Sum (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.baseline_phase"), "_value_", "(_value_[protein_gtr.baseline_phase])[terms.fit.log_likelihood]")); -io.ReportProgressMessageMD ("Protein GTR Fitter", " * Initial branch length fit", - "Overall Log(L) = " + protein_gtr.baseline_fit_logL); - - - -/*************************** STEP TWO *************************** - Perform an initial GTR fit on the data -*****************************************************************/ -console.log("\n\n[PHASE 2] Performing initial REV fit to the data"); - - - - -result_key = protein_gtr.rev_phase_prefix + protein_gtr.fit_phase; -protein_gtr.startTimer (protein_gtr.timers, result_key); -protein_gtr.timer_count +=1; - -current = utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/'" + protein_gtr.baseline_phase + "'"), "_value_", "_value_['" + protein_gtr.baseline_phase + "']"); -// console.log(utility.Keys(protein_gtr.analysis_results)); -// { -// {"options", "input", "0", "1"} -// } -// -// console.log(utility.Keys(protein_gtr.analysis_results["0"])); -// { -// {"input", "Baseline Phase"} -// } -//console.log(current); -// 0: -///EFV -/// BL -/// Trees -/// logl -///parameters -// 1: .... - -protein_gtr.current_gtr_fit = protein_gtr.fitGTRtoFileList (current, None, result_key, FALSE); // last is bool argument, finalphase - - - -// Record logL for some reason I think used to be useful but can't tell if still is -protein_gtr.scores + (protein_gtr.analysis_results[result_key])[terms.fit.log_likelihood]; - - -/* Extract the EFV for use in all model fits (until final tuneup) */ -protein_gtr.shared_EFV = (utility.Values (protein_gtr.current_gtr_fit [terms.efv_estimate]))[0]; -if (Type (protein_gtr.shared_EFV) == "String") { - protein_gtr.shared_EFV = Eval (protein_gtr.shared_EFV); -} - -//console.log(protein_gtr.current_gtr_fit); -// global: sub rates -// EFV -// branch length -//// 0 -////// [all the branches] -//// 1 -////// branch lengthssssss -// Trees (dict now!) -// logl -// parameters - - -protein_gtr.stopTimer (protein_gtr.timers, result_key); - -/********************************** STEP THREE ****************************************** - Iteratively optimize branch lengths with previous REV fit, and re-optimize REV -*****************************************************************************************/ - -console.log("\n\n[PHASE 3] Iteratively optimizing branch lengths and fitting REV model until convergence."); -for (;;) { - - protein_gtr.fit_phase += 1; - - // Optimize branch lengths - protein_gtr.phase_key = protein_gtr.bl_phase_prefix + protein_gtr.fit_phase; - protein_gtr.startTimer (protein_gtr.timers, protein_gtr.phase_key); - protein_gtr.timer_count +=1; - protein_gtr.phase_results = protein_gtr.run_gtr_iteration_branch_lengths(); - protein_gtr.stopTimer (protein_gtr.timers, protein_gtr.phase_key); - - - - // Commented out below because this is never actually used in the analysis, and it is always cached anyways - // protein_gtr.scores + protein_gtr.phase_results[terms.fit.log_likelihood]; - - - result_key = protein_gtr.rev_phase_prefix + protein_gtr.fit_phase; - - protein_gtr.startTimer (protein_gtr.timers, result_key); - protein_gtr.timer_count +=1; - protein_gtr.current_gtr_fit = protein_gtr.fitGTRtoFileList (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.phase_results['phase']"), "_value_", "_value_[protein_gtr.phase_results['phase']]"), protein_gtr.current_gtr_fit, result_key, FALSE); - protein_gtr.stopTimer (protein_gtr.timers, result_key); - - protein_gtr.scores + (protein_gtr.analysis_results[result_key])[terms.fit.log_likelihood]; - - // LogL - if (protein_gtr.convergence_type == "LogL"){ - console.log("\n\n[PHASE 3] Delta log-L = " + (protein_gtr.scores[Abs(protein_gtr.scores)-1] - protein_gtr.scores[Abs(protein_gtr.scores)-2])); - - // should probably be a better statement checking against 0, but for this type of analysis that might actually get us stuck hovering around optimum. - if (protein_gtr.scores[Abs(protein_gtr.scores)-1] - protein_gtr.scores[Abs(protein_gtr.scores)-2] <= protein_gtr.tolerance){ - break; - } - } - // RMSE - else { - previous_Q = (protein_gtr.analysis_results[protein_gtr.rev_phase_prefix + (protein_gtr.fit_phase-1)])[terms.global]; // isolate Q from previous phase - current_Q = (protein_gtr.analysis_results[result_key])[terms.global]; // isolate Q from current phase - - // Calculate RMSE between previous, current fitted Q's - rmse = 0; - N = 0; - for (l1 = 0; l1 < 20; l1 += 1) { - for (l2 = l1 + 1; l2 < 20; l2 += 1) { - - previous = (previous_Q[ terms.aminoacidRate (models.protein.alphabet[l1],models.protein.alphabet[l2]) ])[terms.fit.MLE]; - current = (current_Q[ terms.aminoacidRate (models.protein.alphabet[l1],models.protein.alphabet[l2]) ])[terms.fit.MLE]; - - rmse += (previous - current)^2; - N += 1; - } - } - rmse = Sqrt( rmse/N ); - console.log("\n\n[PHASE 3] RMSE = " + rmse); - if (rmse <= protein_gtr.tolerance) { - break; - } - } - -} - - - - -/********************************** STEP FOUR ****************************************** - Perform a final optimization on the REV matrix while also optimizing the frequencies -***************************************************************************************/ - - -console.log("\n\n[PHASE 4] Convergence achieved. Optimizing final model."); - -protein_gtr.startTimer (protein_gtr.timers, protein_gtr.final_phase); -protein_gtr.current_gtr_fit = protein_gtr.fitGTRtoFileList (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.phase_results['phase']"), "_value_", "_value_[protein_gtr.phase_results['phase']]"), - protein_gtr.current_gtr_fit, - protein_gtr.final_phase, - TRUE); - - -protein_gtr.stopTimer (protein_gtr.timers, protein_gtr.final_phase); - - - - - - -/* Save the JSON */ -protein_gtr.stopTimer (protein_gtr.timers, "Total time"); -protein_gtr.analysis_results[terms.json.timers] = protein_gtr.timers; -io.SpoolJSON(protein_gtr.analysis_results, protein_gtr.json_file); diff --git a/res/TemplateBatchFiles/ProteinGTRFit_helper.ibf b/res/TemplateBatchFiles/ProteinGTRFit_helper.ibf deleted file mode 100644 index f415473dc..000000000 --- a/res/TemplateBatchFiles/ProteinGTRFit_helper.ibf +++ /dev/null @@ -1,509 +0,0 @@ -/*************** Functions used in ProteinGTRFit.bf ******************/ -protein_gtr.convergence_options = {{"LogL", "Assess REV fit convergence by comparing log likelihood scores"}, {"RMSE", "[Recommended] Assess REV fit convergence by comparing RMSE between fitted matrices."}}; -protein_gtr.rate_variation_options = {{"Gamma", "Use a four-category discrete gamma distribution when optimizing branch lengths."}, - {"GDD", "Use a four-category general discrete distribution when optimizing branch lengths."}, - {"No", "Do not consider rate variation when optimizing branch lengths."} - }; - -function protein_gtr.save_options() { - protein_gtr.analysis_results[utility.getGlobalValue("terms.json.options")] = {utility.getGlobalValue("protein_gtr.options.convergence_type"): protein_gtr.convergence_type, - utility.getGlobalValue("protein_gtr.options.tolerance"): protein_gtr.tolerance, - utility.getGlobalValue("protein_gtr.options.baseline_model"): protein_gtr.baseline_model, - utility.getGlobalValue("protein_gtr.options.rate_variation") : protein_gtr.use_rate_variation}; - - protein_gtr.analysis_results[utility.getGlobalValue("terms.json.input")] = {utility.getGlobalValue("terms.json.file"): protein_gtr.listfile, - "number of datasets": protein_gtr.file_list_count}; - - /* Temporarily, we save input file information here in a highly redundant fashion, but doesn't seem possible to do in MPI...? */ - for (file_index = 0; file_index < protein_gtr.file_list_count; file_index += 1) { - - filename = protein_gtr.file_list[file_index]; - utility.EnsureKey(protein_gtr.analysis_results, file_index); - - protein_gtr.file_info = alignments.ReadNucleotideDataSet ("protein_gtr.msa", - filename); - protein_gtr.name_mapping = protein_gtr.file_info[utility.getGlobalValue("terms.data.name_mapping")]; - if (None == protein_gtr.name_mapping) { /** create a 1-1 mapping if nothing was done */ - protein_gtr.name_mapping = {}; - utility.ForEach (alignments.GetSequenceNames ("protein_gtr.msa"), "_value_", "`&protein_gtr.name_mapping`[_value_] = _value_"); - } - utility.ToggleEnvVariable ("GLOBAL_FPRINTF_REDIRECT", "/dev/null"); - ExecuteCommands ('protein_gtr.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions (protein_gtr.file_info[terms.data.partitions], protein_gtr.name_mapping)', - {"0" : "Y"}); - utility.ToggleEnvVariable ("GLOBAL_FPRINTF_REDIRECT", None); - - - protein_gtr.filter_specification = alignments.DefineFiltersForPartitions (protein_gtr.partitions_and_trees, - "protein_gtr.msa" , - "protein_gtr.filter.", - protein_gtr.file_info); - protein_gtr.tree = utility.Map (protein_gtr.partitions_and_trees, "_value_", '_value_[terms.data.tree]'); - - - protein_gtr.output_data_info = { utility.getGlobalValue("terms.json.sequences"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sequences")], - utility.getGlobalValue("terms.json.sites"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sites")], - utility.getGlobalValue("terms.json.trees"): (protein_gtr.tree["0"])[utility.getGlobalValue("terms.trees.newick_with_lengths")], - utility.getGlobalValue("terms.json.file"): filename, - utility.getGlobalValue("terms.original_name"): {} - }; - - - //In case there were no branch lengths - if ( Abs( (protein_gtr.tree["0"])[utility.getGlobalValue("terms.branch_length")] ) == 0 ){ - protein_gtr.output_data_info[ utility.getGlobalValue("terms.json.trees") ] = (protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.trees.newick")]; - } - utility.ForEach (utility.Keys (protein_gtr.name_mapping), "branch_name", - "utility.EnsureKey (protein_gtr.output_data_info[terms.original_name], branch_name)"); - - utility.ForEach (utility.Keys (protein_gtr.name_mapping), "branch_name", - "(protein_gtr.output_data_info[terms.original_name])[branch_name] = protein_gtr.name_mapping[branch_name]"); - - - (protein_gtr.analysis_results[file_index])[utility.getGlobalValue("terms.json.input")] = protein_gtr.output_data_info; - } -} - - - - - -lfunction protein_gtr.startTimer(timers, key) { - timers[key] = { - utility.getGlobalValue("terms.timers.timer"): Time(1), - }; - -} -lfunction protein_gtr.stopTimer(timers, key) { - (timers[key])[utility.getGlobalValue("terms.timers.timer")] = Time(1) - (timers[key])[utility.getGlobalValue("terms.timers.timer")]; -} - - - - - - -/* Model definitions, in particular for models with rate variation */ -//------------------------------------------------------------------------------------------------------------------------ - -/** - * @name models.protein.Baseline.ModelDescription.withGamma - * @description Define baseline (standard matrix) model w/ +F and *no* four-category gamma rate variation - */ -function protein_gtr.Baseline.ModelDescription(type){ - def = Call( models.protein.empirical.plusF_generators[protein_gtr.baseline_model], type); - return def; -} -/** - * @name models.protein.Baseline.ModelDescription.withGamma - * @description Define baseline (standard matrix) model w/ +F and *yes* four-category gamma rate variation - */ -function protein_gtr.Baseline.ModelDescription.withGamma(type){ - def = protein_gtr.Baseline.ModelDescription(type); - def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.Gamma.factory ({utility.getGlobalValue("terms.rate_variation.bins") : 4}); - return def; -} - -/** - * @name models.protein.Baseline.ModelDescription.withGD4 - * @description Define baseline (standard matrix) model and 4bin General discrete rate variation - */ -function protein_gtr.Baseline.ModelDescription.withGDD4(type){ - def = protein_gtr.Baseline.ModelDescription(type); - def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory ({utility.getGlobalValue("terms.rate_variation.bins"): 4}); - return def; -} - -/** - * @name protein_gtr.REV.ModelDescription.freqs - * @description Define REV model frequencies as empirical - */ -function protein_gtr.REV.ModelDescription.freqs (model, namespace, datafilter) { - model[terms.efv_estimate] = protein_gtr.shared_EFV; - model[terms.model.efv_estimate_name] = terms.frequencies.predefined; - (model[terms.parameters])[terms.model.empirical] = 0; - return model; -} - -/** - * @name protein_gtr.REV.ModelDescription - * @description Define a REV model with constant site rates - */ -function protein_gtr.REV.ModelDescription (type) { - def = models.protein.REV.ModelDescription(type); - if (Type (protein_gtr.shared_EFV) == "Matrix") { - def [terms.model.frequency_estimator] = "protein_gtr.REV.ModelDescription.freqs"; - } - return def; -} - -/** - * @name protein_gtr.REV.ModelDescription.withGamma - * @description Define a REV model with Gamma rate variation - */ -function protein_gtr.REV.ModelDescription.withGamma (type) { - def = models.protein.REV.ModelDescription(type); - def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.Gamma.factory ({utility.getGlobalValue("terms.rate_variation.bins"): 4}); - return def; -} - -/** - * @name models.protein.REV.ModelDescription.withGD4 - * @description Define a REV model with 4bin General discrete rate variation - */ -function protein_gtr.REV.ModelDescription.withGDD4(type){ - def = models.protein.REV.ModelDescription.withGDD4(type); - def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory ({utility.getGlobalValue("terms.rate_variation.bins"): 4}); - return def; -} -//------------------------------------------------------------------------------------------------------------------------ - - - - - -/** - * @name protein_gtr.fitBaselineToFile - * @description Fits an empirical amino acid model to dataset for branch length optimization - * @param {String} filename - The name of the file containing the dataset to which the amino acid model will be fitted - * @return the fitted MLE - */ -function protein_gtr.fitBaselineToFile (filename) { - - - utility.EnsureKey(protein_gtr.analysis_results, protein_gtr.index_to_filename[filename]); - - protein_gtr.file_info = alignments.ReadNucleotideDataSet ("protein_gtr.msa", - filename); - protein_gtr.name_mapping = protein_gtr.file_info[utility.getGlobalValue("terms.data.name_mapping")]; - if (None == protein_gtr.name_mapping) { /** create a 1-1 mapping if nothing was done */ - protein_gtr.name_mapping = {}; - utility.ForEach (alignments.GetSequenceNames ("protein_gtr.msa"), "_value_", "`&protein_gtr.name_mapping`[_value_] = _value_"); - } - utility.ToggleEnvVariable ("GLOBAL_FPRINTF_REDIRECT", "/dev/null"); - ExecuteCommands ('protein_gtr.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions (protein_gtr.file_info[terms.data.partitions], protein_gtr.name_mapping)', - {"0" : "Y"}); - utility.ToggleEnvVariable ("GLOBAL_FPRINTF_REDIRECT", None); - - - - protein_gtr.partition_count = Abs (protein_gtr.partitions_and_trees); - io.CheckAssertion ("protein_gtr.partition_count==1", "This analysis can only handle a single partition"); - - - - protein_gtr.filter_specification = alignments.DefineFiltersForPartitions (protein_gtr.partitions_and_trees, - "protein_gtr.msa" , - "protein_gtr.filter.", - protein_gtr.file_info); - - - protein_gtr.full_trees = utility.Map (protein_gtr.partitions_and_trees, "_value_", '_value_[terms.data.tree]'); - protein_gtr.full_data_filter = utility.Map (protein_gtr.filter_specification, "_value_", "_value_[terms.data.name]"); - - - /********** Store dataset information *************/ - /* CURRENTLY DOESN'T WORK IN MPI FOR REASONS TBD */ -// protein_gtr.output_data_info = { utility.getGlobalValue("terms.json.sequences"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sequences")], -// utility.getGlobalValue("terms.json.sites"): protein_gtr.file_info[utility.getGlobalValue("terms.data.sites")], -// utility.getGlobalValue("terms.json.trees"): (protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.trees.newick_with_lengths")], -// utility.getGlobalValue("terms.json.file"): filename -// }; -// -// - // In case there were no branch lengths -// if ( Abs( (protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.branch_length")] ) == 0 ){ -// protein_gtr.output_data_info[ utility.getGlobalValue("terms.json.trees") ] = (protein_gtr.full_trees["0"])[utility.getGlobalValue("terms.trees.newick")]; -// } -// utility.ForEach (utility.Keys (protein_gtr.name_mapping), "branch_name", -// "utility.EnsureKey (protein_gtr.output_data_info[terms.original_name], branch_name)"); -// -// utility.ForEach (utility.Keys (protein_gtr.name_mapping), "branch_name", -// "(protein_gtr.output_data_info[terms.original_name])[branch_name] = protein_gtr.name_mapping[branch_name]"); -// -// -// (protein_gtr.analysis_results[protein_gtr.index_to_filename[filename]])[utility.getGlobalValue("terms.json.input")] = protein_gtr.output_data_info; -// -// - /****************************************************/ - - - protein_gtr.baseline_mle = estimators.FitSingleModel_Ext(protein_gtr.full_data_filter, - protein_gtr.full_trees, - protein_gtr.baseline_model_desc, - None, - None); - // - - protein_gtr.baseline_mle - terms.global; // delete empty key - return protein_gtr.baseline_mle; -} - - - -/** - * @name protein_gtr.fitGTRtoFileList - * @description Fits an average GTR amino acid model to a set of datasets - * @param {Dict} current_results - the current analysis results, used for accurate file indexing - * @param {Dict} previous_values - the fitted REV model from previous fit iteration, to use as initial values for this fit if they exist. If this is the first fit, then this is simply 0. - * @param {String} phase - the key for this fit iteration - * @param {Bool} final - True if this is the final tuning after convergence achieved, False if another fit iteration. - * @return the fitted MLE - */ -function protein_gtr.fitGTRtoFileList (current_results, previous_values, phase, finalphase) { - - io.ReportProgressMessageMD ("Protein GTR Fitter", phase, - "Fitting the REV model using constrained branch lengths proportions (" + phase + ")"); - - //file_list = utility.Keys (current_results); ---> protein_gtr.file_list - //file_count = utility.Array1D (file_list); ---> protein_gtr.file_list_count - // NOTE: protein_gtr.index_to_filename is {filename:0, filename:1} - - partition_info = {}; - filter_info = {}; - trees = {}; - initial_values = {terms.global : {}, terms.branch_length : {}}; - proportional_scalers = {}; - index_to_file_name = {}; - - for (file_index = 0; file_index < protein_gtr.file_list_count; file_index += 1) { - file_path = protein_gtr.file_list [file_index]; - dataset_name = "protein_gtr.msa." + file_index; - partition_info [file_index] = alignments.ReadNucleotideDataSet (dataset_name, file_path); - partition_specification = { "0" : {terms.data.name : "all", terms.data.filter_string : "", term.data.tree : ((current_results[file_index])[terms.fit.trees])[0]}}; - - - filter_info [file_index] = (alignments.DefineFiltersForPartitions (partition_specification, - dataset_name , - dataset_name, - partition_info [file_index]))[0]; - trees [file_index] = {terms.trees.newick : ((current_results[file_index])[terms.fit.trees])[0]}; - (initial_values[terms.branch_length])[file_index] = ((current_results[file_index])[terms.branch_length])[0]; - if (!finalphase) { - scaler = "protein_gtr.gtr_scaler_" + file_index; - parameters.DeclareGlobalWithRanges (scaler, 1, 0, 1000); - proportional_scalers[file_index] = scaler; - } - } - - utility.SetEnvVariable ("VERBOSITY_LEVEL", 1); - utility.ToggleEnvVariable ("AUTO_PARALLELIZE_OPTIMIZE", 1); - utility.ToggleEnvVariable ("OPTIMIZATION_METHOD", 0); - - - if (! finalphase) { - // Set initial values to the previous fit - if (utility.Has (previous_values, terms.global, "AssociativeList")) { - initial_values[terms.global] = previous_values[terms.global]; - } - // Set initial values - else { - for (l1 = 0; l1 < 20; l1 += 1) { - for (l2 = l1 + 1; l2 < 20; l2 += 1) { - (initial_values[terms.global]) [terms.aminoacidRate (models.protein.alphabet[l1],models.protein.alphabet[l2])] = {terms.fit.MLE : 0.1}; // set all to 1 - } - } - } - // fit the model - protein_gtr.rev.mle = estimators.FitSingleModel_Ext ( - utility.Map (filter_info, "_value_", "_value_[terms.data.name]"), - trees, - protein_gtr.rev_model_branch_lengths, - initial_values, - {terms.run_options.proportional_branch_length_scaler : proportional_scalers} - ); - - } - // FINAL TUNING - else { - protein_gtr.rev.mle = estimators.FitSingleModel_Ext ( - utility.Map (filter_info, "_value_", "_value_[terms.data.name]"), - trees, - protein_gtr.rev_model_branch_lengths, - previous_values, - {terms.run_options.retain_lf_object : TRUE} - ); - - - lf_id = protein_gtr.rev.mle[terms.likelihood_function]; - Export(protein_gtr.finalphase_LF, ^lf_id); - protein_gtr.rev.mle - terms.likelihood_function; - fprintf(protein_gtr.final_likelihood_function, protein_gtr.finalphase_LF); - - } - - - - // Save the rev.mle into the analysis_results, and cache it. - (^"protein_gtr.analysis_results")[phase] = protein_gtr.rev.mle; - - console.log (""); // clear past the optimization progress line - utility.SetEnvVariable ("VERBOSITY_LEVEL", 0); - utility.ToggleEnvVariable ("AUTO_PARALLELIZE_OPTIMIZE", None); - utility.ToggleEnvVariable ("OPTIMIZATION_METHOD", None); - - - - // I prefer to keep this as dictionary for compatibility with rest of the output. - protein_gtr.rev.mle[terms.fit.trees] = utility.SwapKeysAndValues(utility.MatrixToDict(protein_gtr.rev.mle[terms.fit.trees])); - return protein_gtr.rev.mle; - -} - - -/** - * @name protein_gtr.UpdateBLWithREV - * @description Use a previously-fitted average REV amino acid model to a file, specifically for branch length optimization under this model - * @param {String} filename - the filename of the dataset to be fitted - * @param {Dict} rates - the rates for the GTR model used in fitting - * @param {Dict} branch_lengths - the current branch length values for this dataset - * @return the fitted MLE - */ -function protein_gtr.UpdateBLWithREV (filename, rates, branch_lengths) { - - protein_gtr.file_info = alignments.ReadNucleotideDataSet ("protein_gtr.msa", - filename); - - utility.ToggleEnvVariable ("GLOBAL_FPRINTF_REDIRECT", "/dev/null"); - - ExecuteCommands ('protein_gtr.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions (protein_gtr.file_info [utility.getGlobalValue("terms.data.partitions")], protein_gtr.name_mapping)', - {"0" : "Y"}); - - utility.ToggleEnvVariable ("GLOBAL_FPRINTF_REDIRECT", None); - protein_gtr.filter_specification = alignments.DefineFiltersForPartitions (protein_gtr.partitions_and_trees, - "protein_gtr.msa" , - "protein_gtr.filter.", - protein_gtr.file_info); - - protein_gtr.rev_file_mle = {terms.global : {}}; - - for (l1 = 0; l1 < 20; l1 += 1) { - for (l2 = l1 + 1; l2 < 20; l2 += 1) { - rate_term = terms.aminoacidRate (models.protein.alphabet[l1],models.protein.alphabet[l2]); - (protein_gtr.rev_file_mle[terms.global]) [rate_term] = - {terms.fit.MLE : (rates[rate_term])[terms.fit.MLE] , terms.fix : TRUE}; - } - } - - protein_gtr.rev_file_mle [terms.branch_length] = { "0" : branch_lengths }; - - - utility.SetEnvVariable ("VERBOSITY_LEVEL", 1); - protein_gtr.rev_file_mle = estimators.FitSingleModel_Ext ( - utility.Map (protein_gtr.filter_specification, "_value_", "_value_[terms.data.name]"), // value => value['name'] - utility.Map (protein_gtr.partitions_and_trees, "_value_", "_value_[terms.data.tree]"), // value => value['tree'] - protein_gtr.rev_model_branch_lengths, // pre-defined with or without gamma. - protein_gtr.rev_file_mle, - None - ); - utility.SetEnvVariable ("VERBOSITY_LEVEL", 0); - console.log (""); // clear past the optimization progress line - - protein_gtr.rev_file_mle - terms.global; // delete redundant keys - - return protein_gtr.rev_file_mle; - -} - - - - - -/** - * @name protein_gtr.run_gtr_iteration_branch_lengths - * @description Optimizes branch lengths for all datasets using the REV model fitted in the current iteration - * @return Dictionary containing summed LogL values from branch length optimizations and the phase index for this iteration - */ -function protein_gtr.run_gtr_iteration_branch_lengths () { - - protein_gtr.queue = mpi.CreateQueue ({ "Headers" : utility.GetListOfLoadedModules ("libv3/") , - "Functions" : - { - {"protein_gtr.REV.ModelDescription", - "protein_gtr.REV.ModelDescription.withGamma", - "protein_gtr.REV.ModelDescription.freqs", - "models.protein.REV.ModelDescription.withGamma" - } - }, - "Variables" : {{ - "protein_gtr.shared_EFV", - "protein_gtr.rev_model_branch_lengths", - "protein_gtr.filename_to_index", - "protein_gtr.phase_key" - }} - }); - - -//console.log(protein_gtr.current_gtr_fit); -// global: sub rates -// EFV -// branch length -//// 0 -////// [all the branches] -//// 1 -////// branch lengthssssss -// Trees (dict now!) -// logl -// parameters - - - - io.ReportProgressMessageMD ("Protein GTR Fitter", protein_gtr.phase_key, "Retuning branch lengths (" + protein_gtr.phase_key + ")"); - - for (file_index = 0; file_index < protein_gtr.file_list_count; file_index += 1) { - - io.ReportProgressMessageMD ("Protein GTR Fitter", protein_gtr.phase_key, - "Dispatching file '" + protein_gtr.file_list[file_index] + "' " + (file_index+1) + "/" + protein_gtr.file_list_count); - - mpi.QueueJob (protein_gtr.queue, "protein_gtr.UpdateBLWithREV", {"0" : protein_gtr.file_list[file_index], - "1" : protein_gtr.current_gtr_fit[terms.global], - "2" : (protein_gtr.current_gtr_fit[terms.branch_length])[file_index]}, - "protein_gtr.handle_branch_length_callback"); - } - mpi.QueueComplete (protein_gtr.queue); - - protein_gtr.run_gtr_iteration_branch_lengths.logL = math.Sum (utility.Map (utility.Filter (protein_gtr.analysis_results, "_value_", "_value_/protein_gtr.phase_key"), "_value_", "(_value_[protein_gtr.phase_key])[terms.fit.log_likelihood]")); - - io.ReportProgressMessageMD ("Protein GTR Fitter", protein_gtr.phase_key, - "Overall Log(L) = " + protein_gtr.run_gtr_iteration_branch_lengths.logL); - - - - return { protein_gtr.logl : protein_gtr.run_gtr_iteration_branch_lengths.logL , protein_gtr.phase : protein_gtr.phase_key}; - -} - -/** - * @name protein_gtr.handle_gtr_callback - * @description Handle MPI callback after fitting a REV model - */ -function protein_gtr.handle_branch_length_callback (node, result, arguments) { - - cachekey = protein_gtr.index_to_filename[arguments[0]]; - - (protein_gtr.analysis_results[cachekey])[utility.getGlobalValue("protein_gtr.phase_key")] = result; - - - io.ReportProgressMessageMD ("Protein GTR Fitter", "* " + protein_gtr.bl_phase_prefix + ^"protein_gtr.fit_phase", - "Received file '" + arguments[0] + "' from node " + node + ". LogL = " + result[terms.fit.log_likelihood]); - - -} - -/** - * @name protein_gtr.handle_baseline_callback - * @param node - node name which processed the given data - * @param {Dict} result - Dictionary of fitted information for given data - * @param {Dict} arguments - Dictionary with single key:value :: 0:datafile name - * @description Handle MPI callback after fitting a baseline amino acid model (for initial branch length optimization) - */ -function protein_gtr.handle_baseline_callback (node, result, arguments) { - - savekey = protein_gtr.index_to_filename[arguments[0]]; - - utility.EnsureKey(protein_gtr.analysis_results, savekey); - utility.EnsureKey(protein_gtr.analysis_results[savekey], protein_gtr.baseline_phase); - (protein_gtr.analysis_results[savekey])[protein_gtr.baseline_phase] = result; - - io.ReportProgressMessageMD ("Protein GTR Fitter", "Initial branch length fit", - "Received file '" + arguments[0] + "' from node " + node + ". LogL = " + result[terms.fit.log_likelihood]); -} diff --git a/tests/hbltests/libv3/ProteinGTRFit.wbf b/tests/hbltests/libv3/ProteinGTRFit.wbf deleted file mode 100644 index 91b5c2855..000000000 --- a/tests/hbltests/libv3/ProteinGTRFit.wbf +++ /dev/null @@ -1,40 +0,0 @@ -LoadFunctionLibrary("libv3/UtilityFunctions.bf"); -LoadFunctionLibrary("libv3/IOFunctions.bf"); - -utility.ToggleEnvVariable ("OPTIMIZATION_PRECISION", 1); -utility.ToggleEnvVariable ("OPTIMIZATION_TIME_HARD_LIMIT", 1); - -function ensureFullPath (path) { - if ((path $ "/")[0]!= 0){ - return HYPHY_LIB_DIRECTORY + ".." + DIRECTORY_SEPARATOR + path; - } - return path; -} - -function writeNewPath (new_path) { - fprintf(list_path_final, new_path + "\n"); -} - - - - -list_path_raw = PATH_TO_CURRENT_BF + "data/protgtr_fitter_lines_raw.txt"; // file list with relative paths -list_path_final = PATH_TO_CURRENT_BF + "data/protgtr_fitter_lines.txt"; // file list with relative paths - - -fscanf (list_path_raw, "Lines", file_list1); -file_list2 = utility.Map(file_list1, "_value_", "ensureFullPath(_value_)"); // ensure full path for each file - -fprintf(list_path_final, CLEAR_FILE, ""); -utility.ForEach(file_list2, "_value_", "writeNewPath(_value_)"); - - -LoadFunctionLibrary("ProteinGTRFit.bf", { - "0": PATH_TO_CURRENT_BF + "data/protgtr_fitter_lines.txt", - "1": "RMSE", // use rmse convergence - "2": "1", // stopping - "3": "WAG", // use WAG for baseline - "4": "No", // use no rate variation -}); - - diff --git a/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa1.fasta b/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa1.fasta deleted file mode 100644 index a36e45339..000000000 --- a/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa1.fasta +++ /dev/null @@ -1,37 +0,0 @@ ->t8 -GDTAADSYLDLVSMPFDVLGLRLRLGALIMAPYFVNGSARKLITWALAYSRVGIPGHGKK -EHPLYGDYTWCGMEQTFQNQGYGHMTAIRVGWPSAADRTA ->t9 -ADTATSAYLVAVSTGGETLGSRRANHNEVTTQYFDGSEGRKNFIWALMYLKVGFCEHGKK -ERPIFGQHNSCAIGTVIEKGNNAAYNDTECGTAKHRGSDA ->t6 -GSTGADTYLDLVSMPFNVLGLRLRLNCLVMAPYFVNGAAKKLFVWSLAYARVGIPGHGKK -DHPLYGNYSWCGMGQIFQNQGYGHLTAIRVGWPSAADRTA ->t7 -ADTATTIYQTAVSTNWETLGLRRANHNEVTTQYFDGPEGEKLFIWALLYLKVAFCEHGKK -RRPIFGQHDAVAIGHVVERENNAASNDPECGLALHVGCDA ->t4 -AETATSTYLVAVSTNWETLGLRRANHNEVTTRYFDGSEGPKNFIWALMTLKVGFCAHGKK -ERPIIGLHNSCGIGPVIEKSNNLAINRTACGAASHVGSDA ->t5 -GSTAADSYLDLVSMPFDVLGLQLMLGALVMAPYFVHGSTRKLFKWALAYSRVGVPGHGKK -EHPLYGNYTWCGMGQIFQNQGYGHMTAIRVGWPSAADRQA ->t2 -GSTGAGMYPDCISTPEDVLGLSLALDARVRGKFLSDKRARRLLVWSLAYAMVGTIGHPKI -EHPLLGDFSWCNLGHVYESSDYQHGTAVRCGWPSDPERNA ->t3 -PRTGRDSYIGVISMDINVLSLALADHIETMDQILNKKEGQSLFIWGLIKARVGLTGHGKK -ERSIFGDFDACTLATAYSKEDFAVTDANRCNYESAPAGDA ->t1 -ENTSTNKYFEWVSTDFDTLGLKIANIVEIISYYFDRNQGNKLIIWGLVYTRVGLCGHGKK -NQPFFGDYTECGIGTIFRKEEFAANTQSRCGFRHAPGSAA ->t10 -IDPSTNAYLTGVSENVENLGLKKATANEVALQYFEAPAGEKLFIMTLLYLKVESMTNAKR -FDPIFGVHDSVAMAHLVEKENNAAGLDSECGLKSIVGCDP ->t11 -GSTGAGTWPDCVSTPDEVLGVSLALDARVRGLFLSDARARRYLIWYLAYARVGLVGHPKV -EHPLFGDFSWCNVGHVFESSDYQHATTVRCGWPSEPPRNA ->t12 -ARTPTDGYFEVVSTDTDTLGLALANQVEVMTQYFDRQQGTKLFIWGLIYARVGVTGHGKK -NRPIFGDYTHCGMATVYNKEDFAASTASRHGFPSTPGSDA -(t1:0.1408208979,((((t4:0.09180754325,t9:0.07128012371):0.1313551269,(t10:0.4165821702,t7:0.04764077778):0.08831922431):0.6377018364,(t3:0.6987412621,t12:0.08350678342):0.04692053027):0.07418762848,(((t5:0.08652378838,t8:0.05359766493):0.103280468,t6:0.08857364883):0.250248048,(t2:0.074322189,t11:0.1835713):0.644012):0.4382723628):0.12478); \ No newline at end of file diff --git a/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa2.fasta b/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa2.fasta deleted file mode 100644 index fc55ae0d1..000000000 --- a/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa2.fasta +++ /dev/null @@ -1,37 +0,0 @@ ->t8 -AGAKSGEGGTASFYYSQTNLDVAKGVLFLMVPQGEWAKESIFSSLRCWQSVKESTILITH -RLIQLNRPDANYELAAGSIWAHYDQFLGGLETNAFYLTAI ->t9 -SGATSGEGSMWSLFYTKVEGELTLGQVHEKKPGAGTALRAFGDLERCLELLVPVIGAAID -RNSELTENAINVKGAAGLAHAAYASFTDIRERGGAWIALL ->t6 -AGAKSGEGGTAALYNSQTDLDVAKGVVFLMVPEGEWAKESVLSSLRCWQSVEESTVVITY -RLVELHRSDVNYELAAGSIWAHYNEFLASLEAAAFYLTAL ->t7 -SGATSAEGAMASLYYTKVEGELSLGEMHKKKPKAGTELRPLGDLERCLELLVPSIGAAID -RNSELSKNAINVALAAGLVHAAYASYTDIREHGGAWIALC ->t4 -SGATSGEGSMRSLFYCKVEGELSLGQVHDKKPGAGTALRAVGSLYRCLELLVPAIGAAID -RNSELTENAINVKLATGLAQAAYASFTNIRERGGAWIALL ->t5 -AAAKSGEGGTASFYYSQTELDVAKGVLFLMVPEGEWAKESVFSSLRCWQSVEESTILITH -RFVQLQRPDANYELAAGSIWAHYDQFLGGLETNAFYLTAV ->t2 -KGAVTYQGNTASLHYTKDDDQGSQGVVALGTPELDWAKKAHEIRLRGQQNVSASLTVQLD -NQEEFQKFEINYRRVSGFVVAYCSAYPSQVETSVYYMTLT ->t3 -TGETEGVGDTKDLYYGHTEGQLVAGVTHDQAEKVGTSGRACATLDSCVGSVVPSLSAWID -RTVALGKSEMRCELVDGYIYSDHQIFIKVIVVGALYISQL ->t1 -SGANAGEGDMVSLYFAWSDGEVAKGVVHEMVPKSGMEPRAPQTLVRCLQSVLPPPTAVVD -RIVELNRSELNYDLAAGLSYASYAEFLNVLETGAFYISVV ->t10 -SGKTSGDGAMASLYYTKVPGEDSLGEVHEKRGKSGTDMGAEGDHKKCLTLFLPSIGAAVD -RDTEFSKKDLNLVLAASLVHSAYAAIADIHEHGGYWTAVC ->t11 -KGAMTDEGNTAKLQYTTDNSQVSHGVVVLGTPELDWAKSAHEVRLAGEQNISPALTVQLD -NQEEFQKFEINYRSVSGFVRAQCSASPSQVQTSVYHMLCT ->t12 -SGATSGEGDMASLYYSKAEGEVAKAVVHELVPKSGTAPRAIETLDGCLQSVVPALSAVID -RTVELSPSELNCDMAAGLIYADYAAFINILETGALYIALL -(t1:0.1408208979,((((t4:0.09180754325,t9:0.07128012371):0.1313551269,(t10:0.4165821702,t7:0.04764077778):0.08831922431):0.6377018364,(t3:0.6987412621,t12:0.08350678342):0.04692053027):0.07418762848,(((t5:0.08652378838,t8:0.05359766493):0.103280468,t6:0.08857364883):0.250248048,(t2:0.074322189,t11:0.1835713):0.644012):0.4382723628):0.12478); \ No newline at end of file diff --git a/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa3.fasta b/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa3.fasta deleted file mode 100644 index 62bd5455e..000000000 --- a/tests/hbltests/libv3/data/protgtr_fitter_alignments/aa3.fasta +++ /dev/null @@ -1,37 +0,0 @@ ->t8 -QPSNEIVLEVGNGIETCVYKADNSDATKFHVDEPHLEAKTPVTTGLDEIRQPSTTVSLLS -IEYSPHAAQRHSETVVISVNGKLQGGEDLYIIPRISCNMK ->t9 -QPKADLAPEVGTKLVNCAFKCSDQSGHRFHIDFSAEKLAPAQSRTLKSYLNPPFSVEVLP -LSFNPQKVQKAPDCIMVSAEGGFEGGAGAFTPFRFSCKLT ->t6 -QPSDELVLEVGNGTETCVFKADNSDATKFHIDEPLMEAKTPRTTGLDEIRKPSTSVDLLS -IEYTPHADQKHADTVVINVNGDLQGGEDLYIIPRISAHVK ->t7 -DPRTDLTPDVGTKLVNCAFKCSDQSGHRFRIDYPADKLAPEQTRSLKKFLNPEISIAELH -LSFSPQKIQKSPDCILISAEGGFEGGAGAFTALRWSCKFT ->t4 -QPKKDLAPPVGTKMVNCAFKCSDQSGHRFHIDYSAEKLAPSQTRITKKYLNPPFSVAVLP -LNFNPDKVQKSPDCIIISAEGGFEGGAGAFTSFRFSCKLA ->t5 -QPSNELVLEVGNGIQTCVFKADNSDATKFHVDEPHLEVKTPLTTGLDEIRQPSTRVSLLS -IEYSPHPAQQHSETVVTSVNGKLQGGEDLYIIPRISCNMK ->t2 -EPTFSLTLTVSIVMPICVLKEETGVAIVFHVDESALETNAQRTAGFKTLKSPATSAVLVP -VDYKPMTAQKHEDPALFKTQKDLDTADKFFVRTYISCNIE ->t3 -AAVVDLKVSVGHVPITCRLKCNDRFTKNFTVQASGLETADRKTISFAPLLGPDFTIAILP -LEWKPQESQEMPDAISVTAEGSFASGVKAFYSSSFSCNLV ->t1 -DPAVDLKLDIGSAVITCALSNDYRSATRFHLDKPALATPTAHTTAWNPLLAPDLSRAILS -LEYTPTKAQKAADTIAITIEGEYPGEKRAFAPTRYNQQFT ->t10 -NPRLDLTPDFGTPLVSCAFPASNQSGHRFEINYPPDRLAPEEQRSLKKFMNPEISITNLH -QSYNPEAIKKNPDCVLISAEGGFNGGAGAFTAFHLSCTFT ->t11 -EPTFDLTLTVSIVMPICVLKANSGVAIVYHVDESALETNAQRTAGFKTLKSPVTNVVLNP -VDYNPQTAQKHEDTALFRTQKDLDTADKFFVRTYIACNIA ->t12 -DPAVDLKLDVGRALVTCALKCDDRYAKRLHIDEGAVETATPQETALQPLLGPDFTVAILP -LEYTPQKAQEAPDTITITVEGSFSGGAKAFAASRFSCNLI -(t1:0.1408208979,((((t4:0.09180754325,t9:0.07128012371):0.1313551269,(t10:0.4165821702,t7:0.04764077778):0.08831922431):0.6377018364,(t3:0.6987412621,t12:0.08350678342):0.04692053027):0.07418762848,(((t5:0.08652378838,t8:0.05359766493):0.103280468,t6:0.08857364883):0.250248048,(t2:0.074322189,t11:0.1835713):0.644012):0.4382723628):0.12478); \ No newline at end of file diff --git a/tests/hbltests/libv3/data/protgtr_fitter_lines_raw.txt b/tests/hbltests/libv3/data/protgtr_fitter_lines_raw.txt deleted file mode 100644 index 930ef8c9f..000000000 --- a/tests/hbltests/libv3/data/protgtr_fitter_lines_raw.txt +++ /dev/null @@ -1,3 +0,0 @@ -tests/hbltests/libv3/data/protgtr_fitter_alignments/aa1.fasta -tests/hbltests/libv3/data/protgtr_fitter_alignments/aa2.fasta -tests/hbltests/libv3/data/protgtr_fitter_alignments/aa3.fasta \ No newline at end of file