-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBiodiversity_Trawl_processing.R
164 lines (150 loc) · 5.1 KB
/
Biodiversity_Trawl_processing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
require(tidyverse)
require(lubridate)
require(geosphere)
# directory of all biodiversity trawl data
files <- list.files("data/BiodiversityTrawl/",
full.names = TRUE)
# list all files for each type of csv
set_files <-
grep("set", files, value = TRUE, ignore.case = TRUE)
catch_files <-
grep("catch", files, value = TRUE, ignore.case = TRUE)
sample_files <-
grep("sample", files, value = TRUE, ignore.case = TRUE)
# read in all SET files and cbind them together
# =============================================
# skip_to look for the maximum row number with a leading "," (i.e. blank cell
# in csv) and feeds into the read_csv to skip the header section of the file.
skip_to <- max(grep("^,", readLines(set_files[1])))
# Create variable "merge_set_data" consisting of all the column names
merge_set_data <-
read_csv(set_files[1],
skip = skip_to,
n_max = 0,
col_types = cols())
set_col_names <- colnames(merge_set_data)
# Loop through all files in the set_files list and read data
for (i in 1:length(set_files)) {
skip_to <- max(grep("^,", readLines(set_files[i])))
set_data <-
read_csv(
set_files[i],
skip = 25,
col_names = set_col_names,
col_types = cols()
)
merge_set_data <- rbind(merge_set_data, set_data)
}
# create good_set_data from merge_set_data and create a unique ID (UID),
# format datetimes, calculate geodetic distance using start/end coordinates,
# and calculate speed in km/h (there is a difference between observed and this
# value!!!)
good_set_data <- merge_set_data %>%
mutate(
UID = paste(MISSION, SETNO, STATION, sep = "."),
START_SET = as.character(dmy_hms(paste(
DATE_START, `TIME START`, sep = " "
))),
END_SET = as.character(dmy_hms(paste(
DATE_START, `TIME FINISH`, sep = " "
))),
SET_DIST = distVincentyEllipsoid(
cbind(.$LATITUDE_START,
.$LONGITUDE_START),
cbind(.$LATITUDE_FINISH,
.$LONGITUDE_FINISH)
),
SET_SECS = as.numeric(difftime(END_SET, START_SET, units = "mins")),
SET_SPEED = SET_DIST / SET_SECS * 3600 / 1000
) %>%
select(
MISSION,
SETNO,
STATION,
time_start = START_SET,
time_finish = END_SET,
LONGITUDE_START,
LATITUDE_START,
LONGITUDE_FINISH,
LATITUDE_FINISH,
depth_start_m = DEPTH_START,
depth_finish_m = DEPTH_FINISH,
speed_k = SET_SPEED,
depth_trawl_ave_m = DEPTH_TRWL_AVE,
temp_trwl_ave_c = TEMP_TRWL_AVE,
total_time_trawled = SET_SECS
) %>%
rename_with(tolower) %>%
write_csv("data/output/passamaquoddy_bay_biodiversity_trawl_set_data.csv")
# read in all CATCH files and cbind them together
# ===============================================
# skip_to look for the maximum row number with a leading "," (i.e. blank cell
# in csv) and feeds into the read_csv to skip the header section of the file.
skip_to <- max(grep("^,", readLines(catch_files[1])))
# Create variable "merge_catch_data" consisting of all the column names but no
# data
merge_catch_data <-
read_csv(catch_files[1],
skip = skip_to,
n_max = 0,
col_types = cols())
catch_col_names <- colnames(merge_catch_data)
# Loop through all files in the catch_files list and read data
for (i in 1:length(catch_files)) {
skip_to <- max(grep("^,", readLines(catch_files[i])))
catch_data <-
read_csv(
catch_files[i],
skip = 25,
col_names = catch_col_names,
col_types = cols()
)
merge_catch_data <- rbind(merge_catch_data, catch_data)
}
# create good_catch_data from merge_catch_data and create a unique ID (UID,
# paste(MISSION, STATION, SETNO, sep = ".")), remove unnecesary columns.
good_catch_data <- merge_catch_data %>%
mutate(UID = paste(MISSION, SETNO, STATION, sep = ".")) %>%
select(
MISSION,
SETNO,
STATION,
SCIENTIFIC_NAME,
COMMON_NAME,
abundance = NUMBER_CAUGHT,
biomass_g = BIOMASS,
COMMENTS
) %>%
rename_with(tolower) %>%
write_csv("data/output/passamaquoddy_bay_biodiversity_trawl_catch_data.csv")
# read in all SAMPLE files and cbind them together
# ===============================================
# skip_to look for the maximum row number with a leading "," (i.e. blank cell
# in csv) and feeds into the read_csv to skip the header section of the file.
skip_to <- max(grep("^,", readLines(sample_files[1])))
# Create variable "merge_sample_data" consisting of all the column names but no
# data
merge_sample_data <-
read_csv(
sample_files[1],
skip = skip_to,
n_max = 0,
col_types = cols()
)
sample_col_names <- colnames(merge_sample_data)
# Loop through all files in the sample_files list and read data
for (i in 1:length(sample_files)) {
skip_to <- max(grep("^,", readLines(sample_files[i])))
sample_data <-
read_csv(
sample_files[i],
skip = 25,
col_names = sample_col_names,
col_types = cols()
)
merge_sample_data <- rbind(merge_sample_data, sample_data)
}
good_sample_data <- merge_sample_data %>%
select(MISSION, SETNO, everything()) %>%
rename_with(tolower) %>%
write_csv("data/output/passamaquoddy_bay_biodiversity_trawl_sample_data.csv")