-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0c4d8d3
commit 7fa0c1f
Showing
4 changed files
with
892 additions
and
287 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,10 +6,10 @@ | |
# | # | ||
# # | ||
# File: MLEResultsProc.sh # | ||
VERSION="v1.3" # | ||
VERSION="v1.4.0" # | ||
# Author: Justin C. Bagley # | ||
# Date: Created by Justin Bagley on Fri, 29 Jul 2016 11:21:37 -0300. # | ||
# Last update: March 6, 2019 # | ||
# Last update: March 15, 2019 # | ||
# Copyright (c) 2016-2019 Justin C. Bagley. All rights reserved. # | ||
# Please report bugs to <[email protected]>. # | ||
# # | ||
|
@@ -19,43 +19,164 @@ | |
# # | ||
########################################################################################## | ||
|
||
if [[ "$1" == "-V" ]] || [[ "$1" == "--version" ]]; then | ||
echo "$(basename $0) $VERSION"; | ||
exit | ||
# Provide a variable with the location of this script. | ||
SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | ||
|
||
# Source Scripting Utilities | ||
# ----------------------------------- | ||
# These shared utilities provide many functions which are needed to provide | ||
# the functionality in this boilerplate. This script will fail if they can | ||
# not be found. | ||
# ----------------------------------- | ||
|
||
UTILS_LOCATION="${SCRIPT_PATH}/../lib/utils.sh" # Update this path to find the utilities. | ||
|
||
if [[ -f "${UTILS_LOCATION}" ]]; then | ||
source "${UTILS_LOCATION}" | ||
else | ||
echo "Please find the file util.sh and add a reference to it in this script. Exiting..." | ||
exit 1 | ||
fi | ||
|
||
echo " | ||
########################################################################################## | ||
# MLEResultsProc v1.3, March 2019 # | ||
|
||
# Source shared functions and variables | ||
# ----------------------------------- | ||
|
||
FUNCS_LOCATION="${SCRIPT_PATH}/../lib/sharedFunctions.sh" # Update this path to find the shared functions. | ||
VARS_LOCATION="${SCRIPT_PATH}/../lib/sharedVariables.sh" # Update this path to find the shared variables. | ||
|
||
if [[ -f "${FUNCS_LOCATION}" ]] && [[ -f "${VARS_LOCATION}" ]]; then | ||
source "${FUNCS_LOCATION}" ; | ||
source "${VARS_LOCATION}" ; | ||
else | ||
echo "Please find the files sharedFunctions.sh and sharedVariables.sh and add references to them in this script. Exiting... " | ||
exit 1 | ||
fi | ||
|
||
2LOGEB10_RSCRIPT_LOCATION="${SCRIPT_PATH}/2logeB10.R" # Update this path to find the 2logeB10.R Rscript. | ||
|
||
# trapCleanup Function | ||
# ----------------------------------- | ||
# Any actions that should be taken if the script is prematurely | ||
# exited. Always call this function at the top of your script. | ||
# ----------------------------------- | ||
function trapCleanup() { | ||
echo "" | ||
# Delete temp files, if any | ||
if is_dir "${tmpDir}"; then | ||
rm -r "${tmpDir}" | ||
fi | ||
die "Exit trapped. In function: '${FUNCNAME[*]}'" | ||
} | ||
|
||
# safeExit | ||
# ----------------------------------- | ||
# Non destructive exit for when script exits naturally. | ||
# Usage: Add this function at the end of every script. | ||
# ----------------------------------- | ||
function safeExit() { | ||
# Delete temp files, if any | ||
if is_dir "${tmpDir}"; then | ||
rm -r "${tmpDir}" | ||
fi | ||
trap - INT TERM EXIT | ||
exit | ||
} | ||
|
||
# Set Flags | ||
# ----------------------------------- | ||
# Flags which can be overridden by user input. | ||
# Default values are below | ||
# ----------------------------------- | ||
quiet=false | ||
printLog=false | ||
verbose=false | ||
force=false | ||
strict=false | ||
debug=false | ||
args=() | ||
|
||
# Set Temp Directory | ||
# ----------------------------------- | ||
# Create temp directory with three random numbers and the process ID | ||
# in the name. This directory is removed automatically at exit. | ||
# ----------------------------------- | ||
tmpDir="/tmp/${SCRIPT_NAME}.$RANDOM.$RANDOM.$RANDOM.$$" | ||
(umask 077 && mkdir "${tmpDir}") || { | ||
die "Could not create temporary directory! Exiting." | ||
} | ||
|
||
# Logging | ||
# ----------------------------------- | ||
# Log is only used when the '-l' flag is set. | ||
# | ||
# To never save a logfile change variable to '/dev/null' | ||
# Save to Desktop use: $HOME/Desktop/${SCRIPT_BASENAME}.log | ||
# Save to standard user log location use: $HOME/Library/Logs/${SCRIPT_BASENAME}.log | ||
# ----------------------------------- | ||
logFile="$HOME/Library/Logs/${SCRIPT_BASENAME}.log" | ||
|
||
# Check for Dependencies | ||
# ----------------------------------- | ||
# Arrays containing package dependencies needed to execute this script. | ||
# The script will fail if dependencies are not installed. For Mac users, | ||
# most dependencies can be installed automatically using the package | ||
# manager 'Homebrew'. Mac applications will be installed using | ||
# Homebrew Casks. Ruby and gems via RVM. | ||
# ----------------------------------- | ||
homebrewDependencies=() | ||
caskDependencies=() | ||
gemDependencies=() | ||
|
||
|
||
|
||
|
||
function MLEResultsProc () { | ||
|
||
######################################## START ########################################### | ||
########################################################################################## | ||
" | ||
|
||
echo "INFO | $(date) |----------------------------------------------------------------" | ||
echo "INFO | $(date) | MLEResultsProc, v1.4.0 March 2019 (part of PIrANHA v1.0.0) " | ||
echo "INFO | $(date) | Copyright (c) 2016-2019 Justin C. Bagley. All rights reserved. " | ||
echo "INFO | $(date) |----------------------------------------------------------------" | ||
|
||
######################################## START ########################################### | ||
echo "INFO | $(date) | STEP #1: SETUP. " | ||
MY_BEAST_OUTPUT_FILES=*.out | ||
echo "INFO | $(date) | Starting MLEResultsProc pipeline... " | ||
echo "INFO | $(date) | Step #1: Set up workspace, check machine type, and read beast .out files into environmental variable. " | ||
############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE | ||
USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")"; | ||
echoCDWorkingDir | ||
#echo "INFO | $(date) | Checking machine type... " | ||
checkMachineType | ||
#echo "INFO | $(date) | Found machine type ${machine}. " | ||
|
||
|
||
echo "INFO | $(date) | STEP #2: CHECK BEAST VERSION (DETECT AND ACCOMODATE RESULTS FILES FROM BEAST1 OR BEAST2). " | ||
MY_BEAST_OUTPUT_FILES=*.out ; | ||
|
||
echo "INFO | $(date) | Step #2: Check BEAST version, in order to detect and accommodate results files from BEAST v1 versus BEAST2. " | ||
## CHECK BEAST VERSION. | ||
##--Conditional on the following check, we will run one of two different versions of | ||
##--MLEResultsProc on the current working dir--one specific to the format of output (.out) | ||
##--files from BEAST v1, and one specific to the format of .out files from BEAST v2. | ||
## Conditional on the following check, we will run one of two different versions of | ||
## MLEResultsProc on the current working dir--one specific to the format of output (.out) | ||
## files from BEAST v1, and one specific to the format of .out files from BEAST v2. | ||
( | ||
for i in $MY_BEAST_OUTPUT_FILES; do | ||
echo "$i" > file.tmp; | ||
break; | ||
done | ||
) | ||
y="$(cat file.tmp)" | ||
y="$(cat file.tmp)"; | ||
|
||
MY_BEAST1_VER_CHECK="$(grep -h 'BEAST\ v1' $y | wc -l)"; | ||
MY_BEAST1_VER_CHECK2="$(grep -h 'log\ marginal\ likelihood' $y | wc -l)"; | ||
|
||
MY_BEAST1_VER_CHECK="$(grep -h 'BEAST\ v1' $y | wc -l)" | ||
MY_BEAST1_VER_CHECK2="$(grep -h 'log\ marginal\ likelihood' $y | wc -l)" | ||
MY_BEAST2_VER_CHECK="$(grep -h 'BEAST\ v2' $y | wc -l)"; | ||
MY_BEAST2_VER_CHECK2="$(grep -h 'marginal\ L\ estimate' $y | wc -l)"; | ||
|
||
MY_BEAST2_VER_CHECK="$(grep -h 'BEAST\ v2' $y | wc -l)" | ||
MY_BEAST2_VER_CHECK2="$(grep -h 'marginal\ L\ estimate' $y | wc -l)" | ||
rm ./file.tmp ; | ||
|
||
rm ./file.tmp | ||
|
||
echo "INFO | $(date) | STEP #3: EXTRACT MLE RESULTS FROM OUTPUT FILES. " | ||
echo "INFO | $(date) | Step #3: Extract MLE results from BEAST output files in current directory. " | ||
|
||
################################## extractB1Results.sh ################################### | ||
|
||
|
@@ -68,24 +189,24 @@ echo "INFO | $(date) | STEP #3: EXTRACT MLE RESULTS FROM OUTPUT FILES. " | |
( | ||
for i in $MY_BEAST_OUTPUT_FILES; do | ||
echo "$i" | ||
echo "$(basename $i)" > "${i}"_filename.tmp | ||
echo "$(basename $i)" > "${i}"_filename.tmp ; | ||
# | ||
grep -n "log marginal likelihood (using path sampling) from pathLikelihood.delta =" ${i} | \ | ||
awk -F"= " '{print $NF}' > "${i}"_PSMLEs.tmp | ||
awk -F"= " '{print $NF}' > "${i}"_PSMLEs.tmp ; | ||
|
||
grep -n "log marginal likelihood (using stepping stone sampling) from pathLikelihood.delta =" ${i} | \ | ||
awk -F"= " '{print $NF}' > "${i}"_SSMLEs.tmp | ||
awk -F"= " '{print $NF}' > "${i}"_SSMLEs.tmp ; | ||
# | ||
MY_PS_RESULT="$(head -n1 ${i}_PSMLEs.tmp)" | ||
MY_SS_RESULT="$(head -n1 ${i}_SSMLEs.tmp)" | ||
MY_PS_RESULT="$(head -n1 ${i}_PSMLEs.tmp)"; | ||
MY_SS_RESULT="$(head -n1 ${i}_SSMLEs.tmp)"; | ||
# | ||
echo "${i}"_filename.tmp "$MY_PS_RESULT" "$MY_SS_RESULT" >> data.tmp | ||
echo "${i}"_filename.tmp "$MY_PS_RESULT" "$MY_SS_RESULT" >> data.tmp ; | ||
done | ||
) | ||
|
||
rm ./*_filename.tmp | ||
rm ./*_PSMLEs.tmp | ||
rm ./*_SSMLEs.tmp | ||
rm ./*_filename.tmp ; | ||
rm ./*_PSMLEs.tmp ; | ||
rm ./*_SSMLEs.tmp ; | ||
|
||
} | ||
|
||
|
@@ -100,7 +221,7 @@ echo "INFO | $(date) | STEP #3: EXTRACT MLE RESULTS FROM OUTPUT FILES. " | |
( | ||
for i in $MY_BEAST_OUTPUT_FILES; do | ||
echo "$i" | ||
echo "$(basename $i)" > "${i}"_filename.tmp | ||
echo "$(basename $i)" > "${i}"_filename.tmp ; | ||
# | ||
grep -n "marginal L estimate =" ${i} | \ | ||
awk -F"= " '{print $NF}' > "${i}"_PSMLEs.tmp ; | ||
|
@@ -129,9 +250,9 @@ echo "INFO | $(date) | STEP #3: EXTRACT MLE RESULTS FROM OUTPUT FILES. " | |
|
||
|
||
|
||
##--Don't forget to run the (single) appropriate function! If output files from BEAST1 *and* | ||
##--BEAST2 runs are present in current working directory (=NOT ALLOWED!), then the BEAST1 | ||
##--results will simply be overwritten. | ||
## Don't forget to run the (single) appropriate function! If output files from BEAST1 *and* | ||
## BEAST2 runs are present in current working directory (=NOT ALLOWED!), then the BEAST1 | ||
## results will simply be overwritten. | ||
if [[ "$MY_BEAST1_VER_CHECK" -gt "0" ]] || [[ "$MY_BEAST1_VER_CHECK2" -gt "0" ]]; then | ||
echo "INFO | $(date) | BEAST v1+ output files detected; conducting post-processing accordingly... " | ||
echo "INFO | $(date) | Extracting MLE results from the following output files: " | ||
|
@@ -145,7 +266,7 @@ fi | |
|
||
|
||
|
||
echo "INFO | $(date) | STEP #4: ARRANGE MLE RESULTS IN TAB-DELIMITED FILE WITH HEADER. " | ||
echo "INFO | $(date) | Step #4: Arrange MLE results in tab-delimited file with header. " | ||
echo "INFO | $(date) | Placing results into 'MLE.output.txt' in current working directory. " | ||
echo "File PS_MLE SS_MLE" > header.txt ; ## Make header row. Change these codes as needed. | ||
cat header.txt data.tmp | sed 's/\_filename.tmp//g; s/\ / /g' > MLE.output.txt ; | ||
|
@@ -155,23 +276,117 @@ echo "INFO | $(date) | STEP #4: ARRANGE MLE RESULTS IN TAB-DELIMITED FILE W | |
rm data.tmp ; | ||
|
||
|
||
echo "INFO | $(date) | STEP #5: LOAD MLE RESULTS INTO R AND COMPUTE BAYES FACTOR TABLES. " | ||
##--We do this in an R script that I wrote named "2logeB10.R" that we simply call here. | ||
##--Note this script needs to be either in the working directory or the user's path. | ||
echo "INFO | $(date) | Step #5: Load MLE results into R and compute Bayes factor tables. " | ||
## We do this in an R script that I wrote named "2logeB10.R" that we simply call here. | ||
## This script is distributed with PIrANHA and, like all other functions including MLEResultsProc, | ||
## it is in the bin/ folder of the distro. Here we account for different distro locations on | ||
## different (users') machines by calling from ${2LOGEB10_RSCRIPT_LOCATION} defined at the | ||
## top of this script (line 56). | ||
|
||
echo "INFO | $(date) | Calculating Bayes factors in R using '2logeB10.R' script... " | ||
R CMD BATCH 2logeB10.R | ||
R CMD BATCH "${2LOGEB10_RSCRIPT_LOCATION}" | ||
|
||
if [[ -s ./2logeB10.Rout ]] && [[ "$(wc -c 2logeB10.Rout | perl -pe 's/\ +([0-9]{4}).*$/$1/g')" -gt "3960" ]]; then | ||
echo "INFO | $(date) | R calculations complete. " | ||
fi | ||
|
||
echo "INFO | $(date) | Done summarizing marginal-likelihood estimation results in BEAST using MLEResultsProc. " | ||
echo "INFO | $(date) | Bye. | ||
" | ||
# | ||
# | ||
# | ||
#echo "INFO | $(date) | Done summarizing marginal-likelihood estimation results in BEAST using MLEResultsProc. " | ||
#echo "INFO | $(date) | Bye. | ||
#" | ||
|
||
echo "----------------------------------------------------------------------------------------------------------" | ||
echo "" | ||
|
||
########################################################################################## | ||
######################################### END ############################################ | ||
|
||
exit 0 | ||
} | ||
|
||
|
||
|
||
############ SCRIPT OPTIONS | ||
## OPTION DEFAULTS ## | ||
# None at this time. | ||
|
||
############ CREATE USAGE & HELP TEXTS | ||
USAGE="Usage: $(basename $0) [OPTION]... | ||
${bold}Options:${reset} | ||
-h help text (also: -help) echo this help text and exit | ||
-V version (also: --version) echo version of this script and exit | ||
${bold}OVERVIEW${reset} | ||
THIS SCRIPT automates post-processing of marginal likelihood estimation results from | ||
running path sampling (ps) or stepping-stone (ss) sampling analyses on different models in | ||
BEAST. The program runs in current working directory containing BEAST output files, i.e. | ||
within a single run folder containing results from a single MLE BEAST run. After attempting | ||
to identify the BEAST version used for the analysis from user's files, the program extracts | ||
MLE results from BEAST output files in current directory. The MLE results are processed and | ||
output to a file named 'MLE.output.txt', and then they are read into R, where a custom | ||
Rscript computes Bayes factors for the models. | ||
This program runs on UNIX-like and Linux systems using commonly distributed utility | ||
software, with usage obtained by running the script with the -h flag. It has been tested | ||
on macOS High Sierra (v10.13+) and Mojave but should work on many earlier versions or | ||
Linux (tested on CentOS 6/7). The main dependencies are R (v3.3+) and Perl (v5+), the later | ||
of which is usually already installed on most mac and Linux distributions. | ||
${bold}Usage examples:${reset} | ||
Call the program using PIrANHA, as follows: | ||
piranha -f MLEResultsProc Run with program defaults in current directory | ||
piranha -f MLEResultsProc --args='-h' Print this help text | ||
${bold}CITATION${reset} | ||
Bagley, J.C. 2019. PIrANHA v1.0.0. GitHub repository, Available at: | ||
<https://github.com/justincbagley/PIrANHA>. | ||
Created by Justin Bagley on Fri, 29 Jul 2016 11:21:37 -0300. | ||
Copyright (c) 2016-2019 Justin C. Bagley. All rights reserved. | ||
" | ||
|
||
if [[ "$1" == "-h" ]] || [[ "$1" == "-help" ]]; then | ||
echo "$USAGE" | ||
exit | ||
fi | ||
|
||
if [[ "$1" == "-V" ]] || [[ "$1" == "--version" ]]; then | ||
echo "$(basename $0) $VERSION"; | ||
exit | ||
fi | ||
|
||
|
||
# ############# ############# ############# | ||
# ## TIME TO RUN THE SCRIPT ## | ||
# ## ## | ||
# ## You shouldn't need to edit anything ## | ||
# ## beneath this line ## | ||
# ## ## | ||
# ############# ############# ############# | ||
|
||
# Trap bad exits with your cleanup function | ||
trap trapCleanup EXIT INT TERM | ||
|
||
# Set IFS to preferred implementation | ||
IFS=$'\n\t' | ||
|
||
# Exit on error. Append '||true' when you run the script if you expect an error. | ||
set -o errexit | ||
|
||
# Run in debug mode, if set | ||
if ${debug}; then set -x ; fi | ||
|
||
# Exit on empty variable | ||
if ${strict}; then set -o nounset ; fi | ||
|
||
# Bash will remember & return the highest exitcode in a chain of pipes. | ||
# This way you can catch the error in case mysqldump fails in `mysqldump |gzip`, for example. | ||
set -o pipefail | ||
|
||
# Invoke the checkDependenices function to test for Bash packages. Uncomment if needed. | ||
# checkDependencies | ||
|
||
# Run the script | ||
MLEResultsProc | ||
|
||
# Exit cleanly | ||
safeExit |
Oops, something went wrong.