@@ -61,7 +61,8 @@ generic_file_opener <- function(file_name, cas_df, n_max, sheet, site_sheet,
61
61
sheet <- " pharms"
62
62
}
63
63
}
64
- data_long <- na.omit(data_long )
64
+ data_long <- dplyr :: filter(data_long , ! is.na(Value ))
65
+ data_long <- dplyr :: filter(data_long , ! is.na(chnm ))
65
66
data_long $ comment <- " "
66
67
data_long $ comment [grep(" <" ,data_long $ Value )] <- " <"
67
68
data_long $ comment [grep(" DNQ" ,data_long $ Value )] <- " DNQ"
@@ -74,18 +75,22 @@ generic_file_opener <- function(file_name, cas_df, n_max, sheet, site_sheet,
74
75
data_long <- data_long [data_long $ Value != " lostinfield" ,]
75
76
data_long <- data_long [data_long $ Value != " -----" ,]
76
77
data_long <- data_long [data_long $ Value != " '-----" ,]
78
+ data_long <- data_long [data_long $ Value != " nosmple" ,]
77
79
data_long $ comment [which(data_long $ Value == " ND" )] <- " <"
78
80
data_long $ Value [which(data_long $ Value == " ND" )] <- data_long $ MDL [which(data_long $ Value == " ND" )]
79
- data_long <- data_long [ data_long $ Value != " NA" ,]
81
+ data_long <- filter( data_long , Value != " NA" )
80
82
81
83
data_long $ Value <- as.numeric(data_long $ Value )
82
84
data_long $ Value <- data_long $ Value / convert
83
85
data_long $ generic_class <- sheet
84
86
data_long $ `Sample Date` <- year
85
87
data_long $ SiteID <- gsub(" site " ," " ,data_long $ SiteID , ignore.case = TRUE )
86
88
87
- data_long <- filter(data_long ,
89
+ # Premature taking out censored values?
90
+ data_long <- filter(data_long ,
88
91
! (is.na(Value ) & comment == " " ))
92
+ # data_long <- filter(data_long,
93
+ # !(is.na(Value)))
89
94
90
95
data_long <- data_long %> %
91
96
mutate(chnm = tolower(chnm )) %> %
@@ -110,10 +115,18 @@ generic_file_opener <- function(file_name, cas_df, n_max, sheet, site_sheet,
110
115
data_long $ CAS [data_long $ chnm == " Nadolol" ] <- " 42200-33-9"
111
116
data_long $ chnm [data_long $ chnm == " Tris(1,3-Dichloro-2-Propyl)Phosphate (t" ] <- " Tris(1,3-dichloro-2-propyl)phosphate (TDCPP)"
112
117
data_long $ CAS [data_long $ chnm == " Tris(1,3-dichloro-2-propyl)phosphate (TDCPP)" ] <- " 13674-87-8"
118
+ data_long $ CAS [data_long $ CAS == " 26248-87-3" ] <- " 13674-84-5" # 2 versions of TDCPP
119
+ data_long $ CAS [data_long $ chnm == " TCEP" ] <- " 115-96-8"
120
+ data_long $ CAS [data_long $ chnm == " Tri(2-chloroethyl) phosphate (TCEP)" ] <- " 115-96-8"
121
+ # data_long$CAS[data_long$CAS == "51805-45-9"] <- "115-96-8"
122
+ data_long $ chnm [data_long $ CAS == " 101-20-2" ] <- " 3,4,4'-Trichlorocarbanilide"
123
+ data_long $ chnm [data_long $ CAS == " 115-96-8" ] <- " Tri(2-chloroethyl) phosphate (TCEP)"
113
124
114
125
data_long <- data_long [! (data_long $ chnm %in% c(" Tcpp_isomer" ," Tcpp Isomer" )),]
115
126
116
127
data_long $ CAS [data_long $ chnm == " Omeprazole + Esomprazole" ] <- " 73590-58-6"
128
+ data_long $ chnm [data_long $ CAS == " 73590-58-6" ] <- " Omeprazole + Esomprazole"
129
+
117
130
118
131
if (any(is.na(data_long $ CAS ))){
119
132
message(" Some CAS didn't match up" )
@@ -157,28 +170,44 @@ clean_cas <- function(cas_df){
157
170
filter(! duplicated(CAS )) %> %
158
171
mutate(chnm = tools :: toTitleCase(chnm ))
159
172
160
- cas_final $ chnm [cas_final $ chnm == " Deet" ] <- " DEET"
161
- cas_final $ chnm [cas_final $ chnm == " Tcep" ] <- " TCEP"
162
- cas_final $ chnm [cas_final $ chnm == " Tcpp" ] <- " TCPP"
163
- cas_final $ chnm [cas_final $ chnm == " Tbep" ] <- " TBEP"
164
- cas_final $ chnm [cas_final $ chnm == " Tdcpp" ] <- " TDCPP"
165
- cas_final $ chnm [cas_final $ chnm == " Total Pcbs" ] <- " Total PCBS"
173
+ cas_final $ chnm [cas_final $ chnm == " Deet" ] <- " N,N-diethyltoluamide (DEET)"
174
+ cas_final $ chnm [cas_final $ chnm == " Tcep" ] <- " Tri(2-chloroethyl) phosphate (TCEP)"
175
+ # cas_final$CAS[cas_final$chnm == "Tri(2-chloroethyl) phosphate (TCEP)"] <- "115-96-8"
176
+ cas_final $ chnm [cas_final $ chnm == " Tcpp" ] <- " Tris(1-chloro-2-propyl)phosphate (TCPP)"
177
+ cas_final $ chnm [cas_final $ chnm == " Tbep" ] <- " Tri(2-chloroethyl) phosphate (TCEP)"
178
+ cas_final $ chnm [cas_final $ chnm == " Tdcpp" ] <- " Tris(1,3-dichloro-2-propyl) phosphate (TDCPP)"
179
+ cas_final $ chnm [cas_final $ chnm == " Total Pcbs" ] <- " Total PCBs"
166
180
cas_final $ chnm [cas_final $ chnm == " O,p'-Ddd" ] <- " o,p'-DDD"
167
181
cas_final $ chnm [cas_final $ chnm == " P,p'-Ddd" ] <- " p,p'-DDD"
168
- cas_final $ chnm [cas_final $ chnm == " Pentachloroanisole (Pca)" ] <- " PCA"
169
- cas_final $ chnm [cas_final $ chnm == " Tributyl Phosphate (Tbp)" ] <- " TBP"
170
- cas_final $ chnm [cas_final $ chnm == " Hydrochlorothiazide (Hctz)" ] <- " HCTZ"
171
- cas_final $ chnm [cas_final $ chnm == " Tris(2−Chloroethyl)Phosphate (Tcep)" ] <- " TCEP"
182
+ cas_final $ chnm [cas_final $ chnm == " Pentachloroanisole (Pca)" ] <- " Pentachloroanisole"
183
+ cas_final $ chnm [cas_final $ chnm == " Tributyl Phosphate (Tbp)" ] <- " Tributyl phosphate (TBP)"
184
+ cas_final $ chnm [cas_final $ chnm == " Hydrochlorothiazide (Hctz)" ] <- " Hydrochlorothiazide"
172
185
cas_final $ chnm [cas_final $ chnm == " O,p'-Ddt" ] <- " o,p'-DDT"
173
186
cas_final $ chnm [cas_final $ chnm == " O,p'-Ddt" ] <- " o,p'-DDT"
174
187
cas_final $ chnm [cas_final $ chnm == " P,p'-Dde" ] <- " p,p'-DDE"
175
188
cas_final $ chnm [cas_final $ chnm == " P,p'-Ddt" ] <- " p,p'-DDT"
176
189
cas_final $ chnm [cas_final $ chnm == " O,p'-Dde" ] <- " o,p'-DDE"
190
+ cas_final $ chnm [cas_final $ chnm == " Indeno[1,2,3-Cd]pyrene" ] <- " Indeno[1,2,3-cd]pyrene"
191
+ cas_final $ chnm [cas_final $ chnm == " Benzo(a)Pyrene" ] <- " Benzo(a)pyrene"
192
+ cas_final $ chnm [cas_final $ chnm == " beta-Bhc" ] <- " beta-Hexachlorocyclohexane"
193
+ cas_final $ chnm [cas_final $ chnm == " P,p'-Methoxychlor" ] <- " p,p'-Methoxychlor"
194
+ cas_final $ chnm [cas_final $ chnm == " alpha-Bhc" ] <- " alpha-Hexachlorocyclohexane"
195
+ cas_final $ chnm [cas_final $ chnm == " Benzo[b]naphtho[2,1-D]thiophene" ] <- " Benzo[b]naphtho[2,1-d]thiophene"
196
+ cas_final $ chnm [cas_final $ chnm == " Dibenzo[a,h]anthracene" ] <- " Dibenz[a,h]anthracene"
197
+ cas_final $ chnm [cas_final $ chnm == " p-Tert-Octylphenol" ] <- " p-tert-octylphenol"
177
198
cas_final $ chnm [cas_final $ CAS == " 26248-87-3" ] <- " Tri(chloropropyl) phosphate"
178
- # cas_final$chnm[cas_final$chnm == "Tris(1-Chloro-2-Propyl)Phosphate (Tcpp)"] <- "TCPP"
179
- cas_final $ chnm [cas_final $ chnm == " Hexachlorobenzene (Hcb)" ] <- " HCB"
199
+ cas_final $ chnm [cas_final $ chnm == " Hexachlorobenzene (Hcb)" ] <- " Hexachlorobenzene"
180
200
cas_final $ chnm [cas_final $ CAS == " 77-93-0" ] <- " Triethyl Citrate "
181
201
cas_final $ chnm [cas_final $ CAS == " 30306-93-5" ] <- " Ethyl Citrate"
202
+ cas_final $ chnm [cas_final $ CAS == " 101-20-2" ] <- " 3,4,4'-Trichlorocarbanilide"
203
+ cas_final $ chnm [cas_final $ CAS == " 73590-58-6" ] <- " Omeprazole + Esomprazole"
204
+ cas_final $ chnm [grep(" Cis-" , cas_final $ chnm )] <- gsub(pattern = " Cis-" ,
205
+ replacement = " cis-" ,
206
+ cas_final $ chnm [grep(" Cis-" , cas_final $ chnm )])
207
+ cas_final $ chnm [grep(" Trans-" , cas_final $ chnm )] <- gsub(pattern = " Trans-" ,
208
+ replacement = " trans-" ,
209
+ cas_final $ chnm [grep(" Trans-" , cas_final $ chnm )])
210
+
182
211
cas_final $ chnm [grep(" Pbde-" , cas_final $ chnm )] <- gsub(pattern = " Pbde-" ,
183
212
replacement = " PBDE-" ,
184
213
cas_final $ chnm [grep(" Pbde-" , cas_final $ chnm )])
@@ -187,9 +216,9 @@ clean_cas <- function(cas_df){
187
216
stringsAsFactors = FALSE ))
188
217
cas_final $ chnm [cas_final $ CAS == " 34911-55-2" ] <- " Bupropion hydrochloride"
189
218
190
- cas_final $ chnm [grep(pattern = " Delta-Benzenehexachloride" ,cas_final $ chnm )] <- " delta-Bhc "
191
- cas_final $ chnm [grep(pattern = " Beta-Benzenehexachloride" ,cas_final $ chnm )] <- " beta-Bhc "
192
- cas_final $ chnm [grep(pattern = " Alpha-Benzenehexachloride" , cas_final $ chnm )] <- " alpha-Bhc "
219
+ cas_final $ chnm [grep(pattern = " Delta-Benzenehexachloride" ,cas_final $ chnm )] <- " Delta-Benzenehexachloride "
220
+ cas_final $ chnm [grep(pattern = " Beta-Benzenehexachloride" ,cas_final $ chnm )] <- " Beta-Benzenehexachloride "
221
+ cas_final $ chnm [grep(pattern = " Alpha-Benzenehexachloride" , cas_final $ chnm )] <- " Alpha-Benzenehexachloride "
193
222
194
223
return (cas_final )
195
224
}
0 commit comments