Skip to content

Commit

Permalink
Merge pull request #1412 from veg/develop
Browse files Browse the repository at this point in the history
Next minor release
  • Loading branch information
spond authored Oct 14, 2021
2 parents f44a6dd + 9ee9471 commit 10ad033
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 52 deletions.
125 changes: 75 additions & 50 deletions res/TemplateBatchFiles/SelectionAnalyses/FEL.bf
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ if (^"fel.ci") {
{"alpha", "Synonymous substitution rate at a site"}
{"beta", "Non-synonymous substitution rate at a site"}
{"alpha=beta", "The rate estimate under the neutral model"}
{"LRT", "Likelihood ration test statistic for beta = alpha, versus beta &neq; alpha"}
{"LRT", "Likelihood ratio test statistic for beta = alpha, versus beta &neq; alpha"}
{"p-value", "Asymptotic p-value for evidence of selection, i.e. beta &neq; alpha"}
{"Total branch length", "The total length of branches contributing to inference at this site, and used to scale dN-dS"}
{"dN/dS LB", "95% profile likelihood CI lower bound for dN/dS (if available)"}
Expand All @@ -148,7 +148,7 @@ if (^"fel.ci") {
fel.table_headers = {{"alpha", "Synonymous substitution rate at a site"}
{"beta", "Non-synonymous substitution rate at a site"}
{"alpha=beta", "The rate estimate under the neutral model"}
{"LRT", "Likelihood ration test statistic for beta = alpha, versus beta &neq; alpha"}
{"LRT", "Likelihood ratio test statistic for beta = alpha, versus beta &neq; alpha"}
{"p-value", "Asymptotic p-value for evidence of selection, i.e. beta &neq; alpha"}
{"Total branch length", "The total length of branches contributing to inference at this site, and used to scale dN-dS"}};
fel.table_screen_output = {{"Codon", "Partition", "alpha", "beta", "LRT", "Selection detected?"}};
Expand Down Expand Up @@ -406,7 +406,6 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod
^"fel.beta_scaler_test" = 1;
^"fel.beta_scaler_nuisance" = 1;



Optimize (results, ^lf
, {
Expand All @@ -417,24 +416,27 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod
);


ci = None;
if (^"fel.ci") {
if (^"fel.srv") {
lf_stash = estimators.TakeLFStateSnapshot (lf);
global omega_ratio_for_ci;
if (^"fel.alpha_scaler" == 0.0) { // has no syn_variation
^"fel.alpha_scaler" = 1e-8;
}
omega_ratio_for_ci :< 10000;
omega_ratio_for_ci :>0;
omega_ratio_for_ci = ^"fel.beta_scaler_test" / ^"fel.alpha_scaler";
if (!sim_mode) {
if (^"fel.srv") {
lf_stash = estimators.TakeLFStateSnapshot (lf);
global omega_ratio_for_ci;
if (^"fel.alpha_scaler" == 0.0) { // has no syn_variation
^"fel.alpha_scaler" = 1e-8;
}
omega_ratio_for_ci :< 10000;
omega_ratio_for_ci :>0;
omega_ratio_for_ci = ^"fel.beta_scaler_test" / ^"fel.alpha_scaler";

^"fel.beta_scaler_test" := omega_ratio_for_ci * ^"fel.alpha_scaler";
ci = parameters.GetProfileCI (&omega_ratio_for_ci, lf, 0.95);
estimators.RestoreLFStateFromSnapshot (lf, lf_stash);
} else {
ci = parameters.GetProfileCI ("fel.beta_scaler_test", lf, 0.95);
^"fel.beta_scaler_test" := omega_ratio_for_ci * ^"fel.alpha_scaler";
ci = parameters.GetProfileCI (&omega_ratio_for_ci, lf, 0.95);
estimators.RestoreLFStateFromSnapshot (lf, lf_stash);
} else {
ci = parameters.GetProfileCI ("fel.beta_scaler_test", lf, 0.95);
}
}
} else {
ci = None;
}

if (sim_mode) {
Expand All @@ -456,8 +458,10 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod
Null [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
}


if (!sim_mode) {
tree_name = (lfInfo["Trees"])[0];
sum = (BranchLength (^tree_name, -1)*^"fel.selected_branches_index")[0];
if (^"fel.resample") {
N = ^"fel.resample";
sims = {};
Expand All @@ -474,12 +478,14 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod
is = fel.handle_a_site (lf, sims[i], partition_index, pattern_info, model_mapping, TRUE);
null_LRT[i] = is;
}
return {utility.getGlobalValue("terms.alternative") : alternative, utility.getGlobalValue("terms.Null"): Null, utility.getGlobalValue("terms.simulated"): null_LRT};
return {utility.getGlobalValue("terms.alternative") : alternative, utility.getGlobalValue("terms.Null"): Null, utility.getGlobalValue("terms.simulated"): null_LRT, utility.getGlobalValue("terms.confidence_interval"): ci};
}
}



return {utility.getGlobalValue("terms.alternative") : alternative,
utility.getGlobalValue("terms.Null"): Null,
utility.getGlobalValue("terms.math.sum") : sum,
utility.getGlobalValue("terms.confidence_interval"): ci};
}

Expand Down Expand Up @@ -581,36 +587,41 @@ lfunction fel.store_results (node, result, arguments) {
} };
}


has_lrt = FALSE;

if (None != result) { // not a constant site

lrt = math.DoLRT ((result[utility.getGlobalValue("terms.Null")])[utility.getGlobalValue("terms.fit.log_likelihood")],
(result[utility.getGlobalValue("terms.alternative")])[utility.getGlobalValue("terms.fit.log_likelihood")],
1);

if (result / ^"terms.simulated") {

has_lrt = result / ^"terms.simulated";

if (has_lrt) {
pv = +((result[^"terms.simulated"])["_MATRIX_ELEMENT_VALUE_>=" + lrt [utility.getGlobalValue("terms.LRT")]]);
lrt [utility.getGlobalValue("terms.p_value")] = (pv+1)/(1+^"fel.resample");
}

result_row [0] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.alternative")], ^"fel.site_alpha");
result_row [1] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.alternative")], ^"fel.site_beta");
result_row [2] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.Null")], ^"fel.site_beta");
result_row [3] = lrt [utility.getGlobalValue("terms.LRT")];
result_row [4] = lrt [utility.getGlobalValue("terms.p_value")];


sum = 0;

/*alternative_lengths = ((result[utility.getGlobalValue("terms.alternative")])[utility.getGlobalValue("terms.branch_length")])[0];

tname = utility.getGlobalValue("terms.tree_attributes.test");
for (_node_; in; ^"fel.case_respecting_node_names") {
_node_class_ = ((^"fel.selected_branches")[partition_index])[_node_];
if (_node_class_ == tname) {
sum += ((alternative_lengths)[_node_])[^"terms.fit.MLE"];
}
}*/

utility.ForEach (^"fel.case_respecting_node_names", "_node_",
'_node_class_ = ((^"fel.selected_branches")[`&partition_index`])[_node_];
if (_node_class_ == utility.getGlobalValue("terms.tree_attributes.test")) {
`&sum` += ((`&alternative_lengths`)[_node_])[utility.getGlobalValue("terms.fit.MLE")];
}
');*/

result_row [5] = sum;

result_row [5] = result[^"terms.math.sum"];

if (None != result [^"terms.confidence_interval"]) {
result_row [6] = (result [^"terms.confidence_interval"])[^"terms.lower_bound"];
Expand All @@ -621,20 +632,26 @@ lfunction fel.store_results (node, result, arguments) {


utility.EnsureKey (^"fel.site_results", partition_index);
if (has_lrt) {
utility.EnsureKey (^"fel.site_LRT", partition_index);
}

for (_fel_result_; in; pattern_info[utility.getGlobalValue("terms.data.sites")]) {
((^"fel.site_results")[partition_index])[_fel_result_] = result_row;
fel.report.echo (_fel_result_, partition_index, result_row);
if (has_lrt) {
((^"fel.site_LRT")[partition_index])[_fel_result_] = result[^"terms.simulated"];
}
}

utility.ForEach (pattern_info[utility.getGlobalValue("terms.data.sites")], "_fel_result_",
'
(fel.site_results[`&partition_index`])[_fel_result_] = `&result_row`;
fel.report.echo (_fel_result_, `&partition_index`, `&result_row`);
'
);


//assert (0);
}
//----------------------------------------------------------------------------------------

fel.site_results = {};
fel.site_LRT = {};

for (fel.partition_index = 0; fel.partition_index < fel.partition_count; fel.partition_index += 1) {
fel.report.header_done = FALSE;
Expand All @@ -650,17 +667,22 @@ for (fel.partition_index = 0; fel.partition_index < fel.partition_count; fel.par
// beta = beta_scaler_test * branch_length or beta_nuisance_test * branch_length

SetParameter (DEFER_CONSTRAINT_APPLICATION, 1, 0);
fel.selected_branches_index = {1,utility.Array1D (fel.case_respecting_node_names)+1};
fel.i = 0;

for (_node_; in; fel.case_respecting_node_names) {
_node_class_ = (fel.selected_branches[fel.partition_index])[_node_];
if (_node_class_ == terms.tree_attributes.test) {
fel.selected_branches_index [fel.i] = 1;
_beta_scaler = fel.scalers[1];
} else {
_beta_scaler = fel.scalers[2];
}
fel.apply_proportional_site_constraint ("fel.site_tree", _node_, fel.alpha, fel.beta, fel.scalers[0], _beta_scaler, (( fel.final_partitioned_mg_results[terms.branch_length])[fel.partition_index])[_node_]);
fel.i += 1;
}



/*
utility.ForEach (fel.case_respecting_node_names, "_node_",
'_node_class_ = (fel.selected_branches[fel.partition_index])[_node_];
Expand Down Expand Up @@ -695,7 +717,7 @@ for (fel.partition_index = 0; fel.partition_index < fel.partition_count; fel.par
fel.queue = mpi.CreateQueue ({"LikelihoodFunctions": {{"fel.site_likelihood"}},
"Models" : {{"fel.site.mg_rev"}},
"Headers" : {{"libv3/all-terms.bf","libv3/tasks/alignments.bf"}},
"Variables" : {{"fel.srv","fel.resample","fel.ci"}}
"Variables" : {{"fel.srv","fel.resample","fel.ci","fel.selected_branches_index"}}
});


Expand Down Expand Up @@ -735,32 +757,35 @@ for (fel.partition_index = 0; fel.partition_index < fel.partition_count; fel.par
io.ClearProgressBar();

mpi.QueueComplete (fel.queue);

fel.partition_matrix = {Abs (fel.site_results[fel.partition_index]), Rows (fel.table_headers)};

utility.ForEachPair (fel.site_results[fel.partition_index], "_key_", "_value_",
'
for (_key_, _value_; in; fel.site_results[fel.partition_index]) {
for (fel.index = 0; fel.index < Rows (fel.table_headers); fel.index += 1) {
fel.partition_matrix [0+_key_][fel.index] = _value_[fel.index];
}
'
);

}
fel.site_results[fel.partition_index] = fel.partition_matrix;


}

fel.json [terms.json.MLE ] = {terms.json.headers : fel.table_headers,
terms.json.content : fel.site_results };

if (fel.resample) {
(fel.json [terms.json.MLE ])[terms.LRT] = fel.site_LRT;
}

io.ReportProgressMessageMD ("fel", "results", "** Found _" + fel.report.counts[0] + "_ sites under pervasive positive diversifying and _" + fel.report.counts[1] + "_ sites under negative selection at p <= " + fel.pvalue + "**");

selection.io.stopTimer (fel.json [terms.json.timers], "Total time");
selection.io.stopTimer (fel.json [terms.json.timers], "FEL analysis");

if (fel.resamples) {
fel.json [terms.simulated] = fel.resamples;
if (fel.resample) {
fel.json [terms.simulated] = fel.resample;
}

if (fel.ci) {
fel.json [terms.confidence_interval] = TRUE;
}

GetString (_hpv,HYPHY_VERSION,0);
Expand Down
4 changes: 3 additions & 1 deletion res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ selection.io.startTimer (fubar.json [terms.json.timers], "Overall", 0);
fubar.path.base = (fubar.json [terms.json.input])[terms.json.file];

KeywordArgument ("cache", "Save FUBAR cache to [default is alignment+.FUBAR.cache]", fubar.path.base + ".FUBAR.cache");
KeywordArgument ("output", "Save FUBAR results (JSON) to [default is alignment+.FUBAR.json]", fubar.codon_data_info[terms.data.file] + ".FUBAR.json");


/*------------------------------------------------------------------------------
Expand All @@ -149,6 +148,9 @@ KeywordArgument ("output", "Save FUBAR results (JSON) to [default is alignment
fubar.path.cache = io.PromptUserForString ("Save FUBAR cache to");
fubar.cache = io.LoadCacheFromFile (fubar.path.cache);

KeywordArgument ("output", "Save FUBAR results (JSON) to [default is alignment+.FUBAR.json]", fubar.codon_data_info[terms.data.file] + ".FUBAR.json");
fubar.codon_data_info [terms.json.json] = io.PromptUserForFilePath ("Save the resulting JSON file to");

fubar.table_output_options = {terms.table_options.header : TRUE, terms.table_options.minimum_column_width: 16, terms.table_options.align : "center"};

console.log ( "> FUBAR will write cache and result files to _`fubar.path.base`.FUBAR.cache_ and _`fubar.path.base`.FUBAR.json_, respectively \n\n");
Expand Down
2 changes: 1 addition & 1 deletion res/TemplateBatchFiles/libv3/IOFunctions.bf
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ lfunction io.SpoolJSON(json, file) {
* @param file
*/
lfunction io.ParseJSON(file_path) {
fscanf(file_path, "Raw", test);
fscanf(file_path, REWIND, "Raw", test);
parsed_test = Eval(test);
return parsed_test;
}
Expand Down
1 change: 1 addition & 0 deletions res/TemplateBatchFiles/libv3/all-terms.bf
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ namespace terms{
data_type = "datatype";
devnull = "/dev/null";
_namespace = "namespace";
warning = "warning";

category = "category";
mixture = "mixture";
Expand Down
56 changes: 56 additions & 0 deletions res/TemplateBatchFiles/libv3/tasks/alignments.bf
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,17 @@ function alignments.PromptForGeneticCodeAndAlignment(dataset_name, datafilter_na
return alignments.LoadCodonDataFile(dataset_name, datafilter_name, alignments.ReadCodonDataSet(dataset_name));
}

/**
* Prompts user for genetic code and alignment; do not create error condition on stop codons
* @name alignments.PromptForGeneticCodeAndAlignmentWarnStops
* @param {String} dataset_name - the name of the dataset you wish to use
* @param {String} datafilter_name - the name of the dataset filter you wish to use
* @returns {Dictionary} r - metadata pertaining to the dataset
*/
function alignments.PromptForGeneticCodeAndAlignmentWarnStops (dataset_name, datafilter_name) {
return alignments.LoadCodonDataFileWarnStops(dataset_name, datafilter_name, alignments.ReadCodonDataSet(dataset_name));
}

/**
* Loads genetic code and alignment from file path
* @name alignments.LoadGeneticCodeAndAlignment
Expand Down Expand Up @@ -393,6 +404,51 @@ function alignments.LoadCodonDataFile(dataset_name, datafilter_name, data_info)
return data_info;
}

/**
* Creates codon dataset filter from data set; do not treat stop codons as an error condition
* @name alignments.LoadCodonDataFileWarnStops
* @param {String} datafilter_name - the name of the dataset filter you wish to use
* @param {String} dataset_name - the name of the existing dataset
* @param {Dictionary} data_info - DataSet metadata information
* @returns {Dictionary} updated data_info that includes the number of sites, dataset, and datafilter name
*/
function alignments.LoadCodonDataFileWarnStops(dataset_name, datafilter_name, data_info) {


DataSetFilter ^ datafilter_name = CreateFilter( ^ dataset_name, 3, , , data_info[terms.stop_codons]);

if (^"`datafilter_name`.sites"*3 != ^"`dataset_name`.sites") {
// generate a more diagnostic error here
data_info[^"terms.warning"] = {};

for (alignments.LoadCodonDataFile.i = 0; alignments.LoadCodonDataFile.i < ^"`dataset_name`.species"; alignments.LoadCodonDataFile.i += 1) {
DataSetFilter ^datafilter_name = CreateFilter( ^ dataset_name, 3, , "" + alignments.LoadCodonDataFile.i , data_info[terms.stop_codons]);
if (^"`datafilter_name`.sites"*3 != ^"`dataset_name`.sites") {
alignments.LoadCodonDataFile.name = alignments.GetIthSequenceOriginalName (dataset_name, alignments.LoadCodonDataFile.i);

alignments.LoadCodonDataFile.site_map = ^"`datafilter_name`.site_map";

alignments.LoadCodonDataFile.annotation_string = utility.PopulateDict (0, ^"`dataset_name`.sites",
'(alignments.LoadCodonDataFile.name[terms.data.sequence])[_idx]',
'_idx');


utility.ForEach (alignments.LoadCodonDataFile.site_map, "_value_",
'
`&alignments.LoadCodonDataFile.annotation_string`[_value_] = `&alignments.LoadCodonDataFile.annotation_string`[_value_] && 0;
');

(data_info[^"terms.warning"])[alignments.LoadCodonDataFile.name[terms.id]] = Join ("",alignments.LoadCodonDataFile.annotation_string);
}
}
DataSetFilter ^ datafilter_name = CreateFilter( ^ dataset_name, 3, , , data_info[terms.stop_codons]);
}
data_info[terms.data.sites] = ^ "`datafilter_name`.sites";
data_info[terms.data.dataset] = dataset_name;
data_info[terms.data.datafilter] = datafilter_name;
return data_info;
}

/**
* Creates nucleotide dataset filter from file
* @name alignments.ReadNucleotideAlignment
Expand Down

1 comment on commit 10ad033

@kjlevitz
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark 'Benchmark.js Benchmark'.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 2.

Benchmark suite Current: 10ad033 Previous: 9ee9471 Ratio
BUSTED-SRV.wbf 80.4052424218059 secs/op (±81.641940%) 33.034917908229 secs/op (±41.814183%) 2.43
BGM.wbf Infinity secs/op (±0.000000%) 3.0396799824914433 secs/op (±1.813698%) Infinity

This comment was automatically generated by workflow using github-action-benchmark.

CC: @klevitz

Please sign in to comment.