From fbf3b15de275c8a3bd8416901c2545e22a9a44ac Mon Sep 17 00:00:00 2001 From: sreichl Date: Thu, 16 May 2024 16:23:37 +0200 Subject: [PATCH] document that unique features and samples are required #9 --- README.md | 2 +- config/README.md | 2 +- config/config.yaml | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 5191ff4..7c0930c 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ The workflow performs the following steps to produce the outlined results: - This is particularly useful when combining data from different experiments or sequencing runs. - Highly Variable Feature Selection (`*_HVF.csv`) - The top percentage of the most variable features is selected based on the binned normalized dispersion of each feature adapted from [Zheng (2017) Nature Communications](https://doi.org/10.1038/ncomms14049). - - These HVFs are often the most informative for downstream analyses such as clustering or differential expression, but smaller effects of interest could be removed. + - These HVFs are often the most informative for downstream analyses such as clustering or differential expression, but smaller effects of interest could be lost. - The selection is visualized by histograms before and after normalization, mean to normalized dispersion scatterplots, and a scatterplot of the ranked normalized dispersion always highlighting the selected features (`*_HVF_selection.png`). - Results (`{split}/*.csv`) - All transformed datasets are saved as CSV files and named by the applied methods, respectively. diff --git a/config/README.md b/config/README.md index ba2ca7d..516b334 100644 --- a/config/README.md +++ b/config/README.md @@ -1,3 +1,3 @@ You only need one configuration file to run the complete workflow. You can use the provided example as starting point. If in doubt read the comments in the config, the documentation of the respective methods and/or try the default values. -**configuration (config/config.yaml):** different for every project/dataset and configures the analyses to be performed, specifically the desired methods and their parameters. The fields are described within the file. +**configuration (config/config.yaml):** Different for every project/dataset and configures the analyses to be performed, specifically the desired methods and their parameters. The fields are described within the file. diff --git a/config/config.yaml b/config/config.yaml index 747688c..01827ad 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -6,14 +6,14 @@ threads: 1 ##### GENERAL ##### -data: /nobackup/lab_bock/projects/JakStruct/results/RNAhom/counts/all_counts.csv #/path/to/data.csv # path to the input count-matrix as CSV file, formatted as feature x sample (rows x columns) -annotation: /research/lab_bock/projects/JakStruct/metadata/RNAhom_sample_metadata.csv #/path/to/metadata.csv # path to the input sample annotation as CSV file, formatted as sample x metadata (rows x columns) -result_path: /nobackup/lab_bock/projects/JakStruct/results/RNAhom/ #/path/to/results/ # # path to the output folder -project_name: RNAhom #MyProject # name of the project/dataset +data: /path/to/data.csv # path to the input count-matrix as CSV file, formatted as feature x sample (rows x columns); unique features and samples are required. +annotation: /path/to/metadata.csv # path to the input sample annotation as CSV file, formatted as sample x metadata (rows x columns) +result_path: /path/to/results/ # path to the output folder +project_name: MySeqData # name of the project/dataset # path to the feature annotation as CSV file, required for RPKM quantification (feature length) and CQN normalization (feature length, covariate), otherwise empty "" # formatted as feature x metadata (rows x columns) -feature_annotation: /nobackup/lab_bock/projects/JakStruct/results/RNAhom/counts/gene_annotation.csv #/path/to/feature_annotation.csv +feature_annotation: /path/to/feature_annotation.csv ##### SPLIT ##### @@ -32,7 +32,7 @@ filter_parameters: min.prop: 0.7 # 0.7 ##### NORMALIZE ##### -# method specific parameters, default are in comments +# method specific parameters, defaults are in the comments ### edgeR # edgeR::CalcNormFactors -> https://rdrr.io/bioc/edgeR/man/calcNormFactors.html