-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathgwas_statgengwas.R
155 lines (119 loc) · 4.54 KB
/
gwas_statgengwas.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
## R script to carry out a GWAS analysis with the package statgenGWAS
## kinship matrix used to account for population structure in the data
## input: Plink .raw and .map files + phenotype file
# run as Rscript --vanilla gwas_statgengwas.R genotype_file=path_to_genotypes snp_map=path_to_map phenotype_file=path_to_phenotypes
# trait=trait_name_in_phenotype_file trait_label=label_to_use_for_trait systematic_effects=population
memory.limit(size = 8000)
library("R.utils")
library("dplyr")
library("data.table")
library("statgenGWAS")
print("GWAS using the statgenGWAS package")
# output_path="../../gwas"
###################################
## read arguments from command line
###################################
allowed_parameters = c(
'genotype_file',
'snp_map',
'phenotype_file',
'trait',
'trait_label',
'systematic_effects'
)
args <- commandArgs(trailingOnly = TRUE)
print(args)
for (p in args){
pieces = strsplit(p, '=')[[1]]
#sanity check for something=somethingElse
if (length(pieces) != 2){
stop(paste('badly formatted parameter:', p))
}
if (pieces[1] %in% allowed_parameters) {
assign(pieces[1], pieces[2])
next
}
#if we get here, is an unknown parameter
stop(paste('bad parameter:', pieces[1]))
}
# genotype_file = "introduction_to_gwas/3.imputation/rice_imputed.raw"
# snp_map = "introduction_to_gwas/3.imputation/rice_imputed.map"
# phenotype_file = "introduction_to_gwas/data/rice_phenotypes.txt"
# trait = "PH"
# trait_label = "PH"
# systematic_effects = "population"
print(paste("genotype file name:",genotype_file))
print(paste("SNP map:",snp_map))
print(paste("phenotype file name:",phenotype_file))
print(paste("trait:",trait))
print(paste("trait label:",trait_label))
systematic_effects = if(exists(x = "systematic_effects")) systematic_effects else NULL
print(paste("systematic effects:",systematic_effects))
if(!is.null(systematic_effects) ) {
systematic_effects_vec = strsplit(systematic_effects, split = ",")[[1]]
} else systematic_effects_vec = NULL
dataset = basename(genotype_file)
## READING DATA
print("now reading in the data ...")
mapfile <- fread(snp_map)
names(mapfile) <- c("Chromosome", "SNP.names","cM","Position")
snpfile <- fread(genotype_file)
snpfile <- select(snpfile, -c(FID,PAT,MAT,SEX,PHENOTYPE))
phenos <- fread(phenotype_file)
gc()
## prepare for gData
print("prepare object of class gData ... ")
snpfile <- as.data.frame(snpfile)
rownames(snpfile) <- snpfile[["IID"]]
vec <- colnames(snpfile) != "IID"
snpfile <-snpfile[,vec]
colnames(snpfile) <- mapfile$SNP.names
mapfile <- as.data.frame(mapfile)
rownames(mapfile) <- mapfile[["SNP.names"]]
colnames(mapfile)[match(c("Chromosome", "Position"), colnames(mapfile))] <- c("chr", "pos")
## create gData object
gData_gwas <- createGData(geno = snpfile, map = mapfile)
gc()
## add phenotypes and covariates
print("read phenotype file ...")
phenotypes <- phenos[,c("id",trait), with = FALSE]
names(phenotypes)[1] <- "genotype"
phenotypes <- as.data.frame(phenotypes)
phenotypes[,trait] <- as.numeric(phenotypes[,trait])
if(!is.null(systematic_effects)) {
covariates <- select(phenos, all_of(systematic_effects_vec))
covariates <- as.data.frame(covariates)
rownames(covariates) <- phenos$id ## !! careful with the name of the sample ID column !!
} else covariates = NULL
## create gData with both genotype and phenotype
gData_gwas <- createGData(gData = gData_gwas, pheno = phenotypes, covar = covariates)
gc()
##### recoding and cleaning
gData_gwas_clean <- codeMarkers(gData_gwas, impute = FALSE, verbose = TRUE, MAF = 0.05)
gc()
#############
## GWAS
#############
GWAS_res <- runSingleTraitGwas(gData = gData_gwas_clean,
kinshipMethod = "astle",
traits = trait,
covar = systematic_effects_vec,
thrType = "fdr", ## fixed, fdr, bonf, small
pThr = 0.05,
LODThr = 3)
print(head(GWAS_res$GWAResult), row.names = FALSE)
print(GWAS_res$GWASInfo)
fname <- paste(dataset,trait_label,"GWAS_statgengwas.results", sep="_")
res <- GWAS_res$GWAResult$phenotypes
fwrite(x = res, file = fname)
gzip(fname, destname=sprintf("%s.gz", fname), overwrite = TRUE, remove = TRUE, BFR.SIZE = 1e+06)
gc()
## summary and plots
summary(GWAS_res)
png(paste(dataset,trait_label,"qq_statgen.png",sep="_"))
plot(GWAS_res, plotType = "qq", trait = trait)
dev.off()
png(paste(dataset,trait_label,"manhattan_statgen.png",sep="_"))
plot(GWAS_res, plotType = "manhattan", trait = trait, yThr = 3)
dev.off()
gc()