Skip to content

Commit

Permalink
validation in place
Browse files Browse the repository at this point in the history
still lots of NA vals
  • Loading branch information
sgosline committed Jul 19, 2024
1 parent 2ac87a6 commit 3cb8f7d
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 11 deletions.
2 changes: 1 addition & 1 deletion exposome/exposome_summary_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ sg.stats <- sig.genes%>%
dplyr::select(-Project)|>
dplyr::rename(Project=friendlyName)|>
dplyr::select(Project,cas_number,Conc,Link,nGenes,Chemical_ID)|>
mutate(concentration=as.numeric(stringr::str_replace(conc,'uM','')))|>
mutate(concentration=as.numeric(stringr::str_replace(Conc,'uM','')))|>
dplyr::select(-Conc)


Expand Down
25 changes: 15 additions & 10 deletions sampleChemMapping/mapSamplesToChems.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,12 @@ required_sample_columns<-c("ClientName","SampleNumber","date_sampled","sample_ma
"LocationLon","LocationName","LocationAlternateDescription",
"AlternateName","cas_number","date_sample_start",
"measurement_value","measurement_value_qualifier","measurement_value_unit",
"measurement_value_molar","measurement_value_molar_unit",
"water_concentration","water_concentration_qualifier","water_concentration_unit",
"water_concentration_molar","water_concentration_molar_unit")
"measurement_value_molar","measurement_value_molar_unit")

#we need to rename the water columns
new_sample_columns=c(environment_concentration="water_concentration",environment_concentration_qualifier='water_concentration_qualifier',
environment_concentration_unit='water_concentration_unit',environment_concentration_molar='water_concentration_molar',
environment_concentration_molar_unit='water_concentration_molar_unit')

##required for comptox-derived mapping files
required_comptox_columns <- c("INPUT","DTXSID","PREFERRED_NAME","INCHIKEY","SMILES","MOLECULAR_FORMULA",
Expand All @@ -38,8 +41,8 @@ required_comptox_columns <- c("INPUT","DTXSID","PREFERRED_NAME","INCHIKEY","SMIL
##output tables
sample_chem_columns <-c('Sample_ID','Chemical_ID',"measurement_value","measurement_value_qualifier","measurement_value_unit",
"measurement_value_molar","measurement_value_molar_unit",
"water_concentration","water_concentration_qualifier","water_concentration_unit",
"water_concentration_molar","water_concentration_molar_unit")
"environment_concentration","environment_concentration_qualifier","environment_concentration_unit",
"environment_concentration_molar","environment_concentration_molar_unit")

samp_columns <-c("Sample_ID","ClientName","SampleNumber","date_sampled","sample_matrix","technology",
"projectName","SampleName","LocationLat","projectLink",
Expand Down Expand Up @@ -309,19 +312,21 @@ buildSampleData<-function(fses_files, #files from barton that contain sample inf
sampIds, #new ids for samples
sampMapping){ ##mapping for sample names to clean up
##New data provided by michael
# print(fses_files)
# print(fses_files)

sampChem<-do.call(rbind,lapply(fses_files,function(fs){
# print(fs)
# fses1<-subset(sampTab,name=='fses1')[['location']]
sc <- rio::import(fs)|>#paste0(data.dir,'/fses/fses_data_for_pnnl_4-27-2021.csv'))%>%
# sampChem<-read.csv(paste0(data.dir,'/pnnl_bioassay_sample_query_1-14-2021.csv'))%>%
dplyr::select(all_of(required_sample_columns))%>%
dplyr::select(all_of(c(required_sample_columns,unlist(new_sample_columns))))%>% #TODO: change original file to use new names
# dplyr::rename(new_sample_columns)|> ##REMOVE this once we have new names
subset(SampleNumber!='None')%>%
subset(cas_number!='NULL')%>%
mutate(water_concentration_molar=stringr::str_replace_all(water_concentration_molar,'BLOD|NULL|nc:BDL',"0"))%>%
mutate(environment_concentration_molar=stringr::str_replace_all(environment_concentration_molar,'BLOD|NULL|nc:BDL',"0"))%>%
mutate(measurement_value_molar=stringr::str_replace_all(measurement_value_molar,'BLOD|NULL|BDL',"0"))%>%
mutate(water_concentration=stringr::str_replace_all(water_concentration,'BLOD|NULL|BDL',"0"))%>%
# subset(water_concentration_molar!='0.0')%>%
mutate(environment_concentration=stringr::str_replace_all(environment_concentration,'BLOD|NULL|BDL',"0"))%>%
# subset(environment_concentration_molar!='0.0')%>%
subset(!measurement_value_molar%in%c('0'))%>%
subset(!measurement_value%in%c("0","NULL",""))#%>%
# select(-c(Sample_ID))#,Chemical_ID)) ##These two are added in the 4/27 version of the file
Expand Down

0 comments on commit 3cb8f7d

Please sign in to comment.