-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathOWRDdatapull.R
112 lines (91 loc) · 5.42 KB
/
OWRDdatapull.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# pulling data from OWRD and trying to incorporate that into the shiny App
#major kudos to Ryan Michie for sharing code
#' Retrieve surface water monitoring results from Oregon Water Resources Department
#'
#' Data is retrieved from Oregon Water Resources Department's near real time monitoring results.
#' https://apps.wrd.state.or.us/apps/sw/hydro_near_real_time/
#'
#' @param station Required string vector of stations to be fetched.
#' @param startdate Required string setting the start date of the data being fetched. Format 'yyyy-mm-dd'.
#' @param enddate Required string setting the end date of the data being fetched. Format 'yyyy-mm-dd'.
#' @param char Required string vector identifying the characteristics to be fetched. Options include:
#' * 'MDF' - Mean Daily Flow
#' * 'Instantaneous_Stage' - Instantaneous Stage
#' * 'Instantaneous_Flow' - Instantaneous Flow
#' * 'Measurements' - Discharge grab measurements
#' * 'WTEMP_MEASURE' - Water temperature grab measurements
#' * 'WTEMP15' - Instantaneous Water temperature
#' * 'WTEMP_MEAN' - Daily mean Water temperature
#' * 'WTEMP_MAX' - Daily maximum Water temperature
#' * 'WTEMP_MIN' - Daily minimum Water temperature
#' @export
#' @return data frame of data
owrd_data <- function(station, startdate, enddate, char) {
# Testing
# station=c("10366000", "13330000")
# startdate="2020-12-01"
# enddate="2021-02-15"
# char=c("MDF", "WTEMP_MAX")
WRDurl <- "https://apps.wrd.state.or.us/apps/sw/hydro_near_real_time/hydro_download.aspx?"
if(!any(char %in% c('WTEMP15', 'WTEMP_MEASURE', 'WTEMP_MEAN', 'WTEMP_MAX', 'WTEMP_MIN', 'MDF', 'Instantaneous_Stage', 'Instantaneous_Flow', 'Measurements'))) {
stop("non valid value in 'char'")
}
df.query <- expand.grid(WRDurl=WRDurl, station=station, startdate=startdate, enddate=enddate, char=char)
df.query <- df.query %>%
dplyr::mutate(query=dplyr::case_when(char %in% c('MDF', 'Instantaneous_Stage', 'Instantaneous_Flow', 'Measurements') ~
paste0(WRDurl,
"station_nbr=", station,
"&start_date=", startdate,
"%2012:00:00%20AM&",
"end_date=", enddate ,
"%2012:00:00%20AM&",
"dataset=", char,
"&format=tab"),
TRUE ~
paste0(WRDurl, "station_nbr=",station,
"&start_date=",startdate ,
"%2012:00:00%20AM&",
"end_date=",enddate ,
"%2012:00:00%20AM&",
"dataset=",char ,
"&format=tab&units=C")))
df1 <- df.query %>%
dplyr::group_by(query) %>%
dplyr::group_split() %>%
lapply(FUN = RCurl::getURL) %>%
lapply(FUN = textConnection) %>%
lapply(FUN=read.table, header = TRUE, sep ="\t", skip=0, stringsAsFactors=FALSE)
# remove . from column names
df2 <- lapply(df1, FUN=function(x){
colnames(x) <- gsub(x=colnames(x), pattern="\\.", replacement="")
return(x)})
# pivot the data into long format
df3 <- lapply(df2, tidyr::pivot_longer, cols=dplyr::any_of(c("mean_daily_flow_cfs", "instananteous_stage_ft", "instananteous_flow_cfs", "measured_stage_ft",
"temperature_measurement_C", "instantaneous_water_temp_C", "daily_mean_water_temp_C",
"daily_max_water_temp_C", "daily_min_water_temp_C",
"temperature_measurement_F", "instantaneous_water_temp_F", "daily_mean_water_temp_F",
"daily_max_water_temp_F", "daily_min_water_temp_F")), values_to = "Result.Value", names_to = "Characteristic.Name")
# fix various data types and add cols when they don't exist
df4 <- df3 %>%
lapply(FUN=dplyr::mutate, time_observed=if(!"time_observed" %in% colnames(.)) as.character(NA)) %>%
lapply(FUN=dplyr::mutate, download_date=if(!"download_date" %in% colnames(.)) as.character(NA)) %>%
lapply(FUN=dplyr::mutate, record_date=as.character(record_date)) %>%
lapply(FUN=dplyr::mutate, published_status=as.character(published_status)) %>%
lapply(FUN=dplyr::mutate, download_date=as.character(download_date)) %>%
dplyr::bind_rows() %>%
dplyr::select(station_nbr, record_date, time_observed, Characteristic.Name, Result.Value, estimated, revised, published_status, download_date)
return(df4)
}
# Testing
# station=c("10366000", "13330000")
# startdate="2020-12-01"
# enddate="2021-02-15"
# char=c("MDF", "WTEMP_MAX")
#let's give this a shot
#data<-owrd_data(station="10388000", startdate="1990-01-01",enddate="2022-01-01",char="MDF")
#get it into a format that will work with dflow function
#p <- data[,c(2,5)]
#colnames(p) <-c("date", "flow")
#p$date <- as.POSIXct(p$date, format="%m-%d-%Y")
#p<-as.data.frame(p)
#dflow(x=p, m=30, r=5, yearstart=NA, yearend=NA, wystart="10-01", wyend="09-30")