FishSizeTemperatureMainDocument_Public.Rmd

---
title: "Changes in fish body sizes relate to temperature, but not all species shrink with warming"
author: "Asta Audzijonyte, Shane Richards, Julia Blanchard et al."
date: "22 March 2018"
output: 
  html_document:
    code_folding: show
    toc: yes
    toc_float: yes
  pdf_document:
    toc:yes
---

This document is an electronic supplement to the publication "Fish body sizes change with temperature, but not all species shrink with warming" by Audzijonyte A, Richards SA, Stuart-Smith RD, Pecl G, Edgar GJ, Barrett NS, Payne N, Blanchard JL, Nature Ecology and Evolution (https://www.nature.com/articles/s41559-020-1171-0). 
**If you want to use the dataset for further research or publications it would be appropriate to contact Rick Stuart-Smith, Graham Edgar and Neville Barret who are behind this massive effort of long-term data collection**


To run the code make sure all the libraries listed in the first chunk are installed 

### Load libraries

```{r warning=FALSE, message=FALSE, echo=FALSE, eval= T}

rm(list = ls()) # clear memory

library(tidyverse)
library(maps)
library(mapdata)
library(rstan)
library(ggplot2)
library(ggmap)
library(reshape)
library(lmodel2)
library(cowplot)
library(weights)
library(rgdal)

```

### Load datafiles

```{r warning=FALSE, message=FALSE, eval=T, echo=FALSE}

#Main datafile of fish sizes after removing outliers and rare species (335 species in the dataset)
load(file = "inputs/fish_data.RData") #the data file is called main_data
#check that we have 335 species 
length(unique(main_data$TAXONOMIC_NAME))


```

### Extended Data Fig1: map data and sites 

```{r warning=FALSE, message=FALSE, echo = F, eval = T, fig.width=10, fig.height=6} 
# Here you can explore all survey sites in the dataset, but plotting them AKES A LONG TIME!!!
#ggmap(australia, extent = "panel", legend = "bottomright") +
#  geom_point(aes(x = main_data$SiteLong, y = main_data$SiteLat), data = main_data, size = 1, color = "orange") + 
#  ggtitle ("All RLS/MPA sites (not grouped yet)")

## Now map all the sites coloured by their geogroup and check that they look ok on the grid
#these are the grid lines for 0.5degree - THIS ALSO TAKES A LONG TIME
#hlines <- seq(min(main_data$SiteLat),max(main_data$SiteLat),by=0.5) # create a sequence of 0.5 degree
#vlines <- seq(min(main_data$SiteLong),max(main_data$SiteLong),by=0.5)
#ggmap(australia, extent = "panel", legend = "bottomright") +
#geom_point(aes(x = main_data$SiteLong, y = main_data$SiteLat), data = main_data, size = 1, color = main_data$geogroup) +
#scale_y_continuous(limits = c(-44.00, -37.00), expand = c(0, 0))  +
#scale_x_continuous(limits = c(143.00, 150.00), expand = c(0, 0)) +
#  geom_vline(xintercept = vlines) +
#  geom_hline(yintercept = hlines) +
#  ggtitle("Grouping of RLS/MPA sites based on geographic grid") +
#xlab("Longitutde") + ylab("Latitude")

#Get some summary statistics about the geographic groups. For the plot purposes the annual cell temperature is calculated as a mean over all years. In analyses we use annual temperature  
rls_aus_geo <- main_data %>% 
    group_by(geogroup) %>% 
      summarise (location = first(Location), latt = round(mean(SiteLat),3), long = round(mean(SiteLong),3), year = n_distinct(year), surveys = n_distinct(SurveyID), locatnum = n_distinct(Location), anntemp = mean(meansst), species = n_distinct(TAXONOMIC_NAME), gdd = mean(scaledGDD)) 

## Now plot the geographic groups but the colour of the dot reflects the mean geosite temperature
rls_aus_geo$tempF <- floor(rls_aus_geo$anntemp) #round the annual SST to the nearest degree 
colvector <- list(NA)
colfunc <- colorRampPalette(c("yellow", "red")) #create colour ramp pallele
tempcol = colfunc(18) # sample 18 colours from it, as the difference between min and max geosite temperature is 18 degrees
colvector$col <- tempcol
colvector$tempF <- seq(from = 12, to = 29)
# add a temperature colour identifier to each geosite
rls_aus_geo$color <- colvector$col[match(rls_aus_geo$tempF, colvector$tempF)]

#load long term location coordinates
load(file = "inputs/longLoc.RData")
## Or the location names and coordinates are also given here 
#1 BassSt	-39.44452	147.2600
#2	Bichen	-41.87929	148.3118
#3	Jervis	-35.08387	150.7708
#4	Jurien	-30.38433	115.0224
#5	Maria 	-42.58866	148.0122
#6	Ninepi	-43.28544	147.1565
#7	Port D	-43.32945	145.9253
#8	Port P	-38.24981	144.7030
#9	Tinder	-43.06525	147.3322

#Plot sites on a shapefile
rgdal::readOGR(dsn = ".", "aust_cd66states") %>% 
  fortify() %>% 
  ggplot() +
  geom_path(aes(x = long, y = lat, group = group), color = 'darkgrey', size = .3) +
  geom_point(aes(x = rls_aus_geo$long, y = rls_aus_geo$latt), data = rls_aus_geo, size = (rls_aus_geo$species/30), color = rls_aus_geo$color) + 
  geom_point(aes(x = longLoc$long, y = longLoc$lat), data = longLoc, shape = 8, size = 2, color = "black") +
  geom_text(x=111.5, y=-30, label= "Jurien Bay", size = 4, fontface = 3) + 
  geom_text(x=154, y=-35, label= "Jervis Bay", size = 4, fontface = 3) + 
  geom_text(x=144, y=-37, label= "Port Phillip Bay", size = 4, fontface = 3) + 
  geom_text(x=151, y=-39, label= "Bass Strait", size = 4, fontface = 3) + 
  geom_text(x=151, y=-41.5, label= "Bicheno", size = 4, fontface = 3) + 
  geom_text(x=152, y=-42.8, label= "Maria Island", size = 4, fontface = 3) + 
  geom_text(x=152.7, y=-44, label= "Ninepin & Tinderbox", size = 4, fontface = 3) + 
  geom_text(x=142.5, y=-43, label= "Port Davey", size = 4, fontface = 3) + 
  ggtitle("Mean annual SST of each geographic cell (12 to 29)") + 
  xlab("Longitutde") + ylab("Latitude")

#ggmap(australia, extent = "panel", legend = "bottomright") +
#  geom_point(aes(x = rls_aus_geo$long, y = rls_aus_geo$latt), data = rls_aus_geo, size = (rls_aus_geo$species/30), color = rls_aus_geo$color) +
#  ggtitle("Mean annual SST of each geographic cell (12 to 29), circle size proportional to species number") + 
#  xlab("Longitutde") + ylab("Latitude")

```

### Daily SST values to get temperature statistics

This chunk does not have to be run because the main datafile already has SST statistics associated with each geographic cell. However, it is included here in case someone wants to do different analyses or calcualte different temperature statistics. Thanks to Mike Sumner (Australian Antarctic Division) for help with the SST data!

```{r, warning=FALSE, message=FALSE, eval = F, echo = F}
### DAILY MODEL DATA

## load daily SST values for the 280 geographic cells (each identified with a unique number)
load(file = "inputs/dailySST.RData")

#Calculate mean annual temperature for each geogroup (cell) and save
meangeoSST <- dailyTemp %>% group_by(geogroup, year) %>% summarise (annSST = mean(sst), count = n())

save(meangeoSST, file = "inputs/meangeoSST.RData")

```

### Supplementary Figure 4: are species distributed lognormally? Mostly

```{r, warning=FALSE, message=FALSE, echo = F, eval = T, fig.width=10, fig.height=6}
#rename the data frame 
df <- main_data

#set factor levels
df$TAXONOMIC_NAME <- factor(df$TAXONOMIC_NAME)
df$geogroup <- factor(df$geogroup)
#there are actually more size classes in the data set, but for plotting purposes we don't use largest size classes. Missing size classes will be shown as NA. This is not the size inforamtion used in the main analyses
df$Size <- factor(as.character(round(df$SizeClass, 1)), 
  levels = c("2.5",  "5", "7.5", "10", "12.5", "15", "20", "25", "30", "35", "37.5", "40", "50", "62.5", "75", "87.5", "100"))

spp <- unique(df$TAXONOMIC_NAME)

## select which species to plot. For manuscript purposes I plot only 20 species
#For other species change the index of species to plot in the line below
df_summary <- df %>% filter (TAXONOMIC_NAME %in% spp[c(1:20)]) %>% group_by(TAXONOMIC_NAME, Size) %>% summarise(n = n()) 

ggplot(df_summary, aes(x = Size, y = n, group = TAXONOMIC_NAME)) +
  geom_point() + geom_line() +
  facet_wrap( ~ TAXONOMIC_NAME, scale = "free_y") +
  labs(x = "Size (cm)", y = "Abundance") +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle=90, hjust=1, vjust=.5, size = 6),
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank()
  )


```

### Supplementary Figure 2: Plot size-temperature distributions in space

Two example plots are provided in the manuscript electronic supplement. Plots for all species are available on https://github.com/astaaudzi/RLSfishSize/tree/master/AllSizePlots

```{r warning=FALSE, message=FALSE, echo=FALSE, eval = F}

load(file = "inputs/fish_data.RData")

#overwrite dataset name so I can use Shane's code
df <- main_data
##rm(fish_temp)
Species <- sort(unique(df$TAXONOMIC_NAME))

for (aa in 1:length(Species)) {

 SpeciesName <- Species[aa]
 print(SpeciesName)
 print(aa)
 
 #prepare data 
 df$y <- log(df$SizeClass)
  df_fit <- df %>% filter(TAXONOMIC_NAME == SpeciesName)
  
  df_fit$X1 <- (df_fit$meansst - mean(df_fit$meansst)) / sd(df_fit$meansst)
  df_fit$X2 <- df_fit$X1^2
  
  yVals <- sort(unique(df$SizeClass)) # fish size classes
  I     <- length(yVals)              # number of fish size classes
  yCuts <- rep(0, I-1)                # fish sizes that split size classes
  for (i in 2:I) {
    yCuts[i-1] <- log(0.5*(yVals[i-1] + yVals[i])) 
  }
 
  ## plot sizes
  df_means <- df_fit %>%
    group_by(meansst) %>%
    summarise(mu = mean(y), n = n(), ln10 = log10(n)) %>%
    arrange(meansst)
 
  sizeplot <- ggplot(df_fit, aes(x = meansst, y = y)) +
    geom_point(color = "grey80", size = 0.5) + # observed data
    geom_point(data = df_means, aes(x = meansst, y = mu, size = ln10), color = "dark red") +
    ylim(0.5, 4.5) +
    labs(
      title = SpeciesName, subtitle = "grey dots show all observed size classes",
      size = "no.ind\n(log10)", x = "Mean cell annual SST (C)", y = "Mean log length (cm)") +
    theme_bw()

ggsave (filename = paste("SizePlotsSpatial/",SpeciesName,".jpg",sep=""), plot = sizeplot, width = 8, height = 7)    
rm(sizeplot)

}


```

### First set of Bayesian analyses: split the data

Based on initial analyses, some species need higher or lower priors to ensure that their posteriors are within priors. This chunk does not need to be run now, because final estimates of slopes are provided 

```{r, warning=FALSE, message=FALSE, echo = F, eval = F}
#List of species
Species <- sort(unique(df$TAXONOMIC_NAME))

## species that should be repeated with increased beta0 priors (2.0 to 5.0) - use code spatial_largerb0.stan
higherB0 <- c("Achoerodus gouldii", "Achoerodus viridis", "Aplodactylus arctidens", "Caranx melampygus", "Caranx sexfasciatus", "Cheilodactylus spectabilis", "Dactylophora nigricans", "Diagramma labiosum", "Heterodontus portusjacksoni", "Hipposcarus longiceps", "Kyphosus bigibbus","Kyphosus cinerascens","Kyphosus sydneyanus", "Kyphosus sectatrix","Latridopsis forsteri","Lethrinus nebulosus","Lutjanus bohar", "Naso tonganus", "Plectropomus leopardus","Seriola lalandi") #20 spp 

## decrease beta0 priors (0.5 to 3.0) = use code spatial_lowerb0.stan
lowerB0 <- c("Apogon doederleini", "Brachaluteres jacksonianus", "Chromis atripes", "Chromis margaritifer", "Chromis nitida", "Chromis viridis", "Cirrhilabrus exquisitus", "Cirripectes filamentosus", "Dascyllus aruanus", "Dascyllus reticulatus", "Pomacentrus coelestis","Pomacentrus lepidogenys","Pomacentrus moluccensis", "Valenciennea muralis") #14

#these species need higher priors for size error term (0.2 to 0.6) = use code spatial_higherSigma.stan
higherSigma <- c("Acanthaluteres vittiger", "Acanthochromis polyacanthus", "Amphiprion akindynos", "Cheilinus chlorourus", "Cheilinus trilobatus", "Cheilodipterus quinquelineatus", "Chlorurus sordidus", "Choerodon rubescens", "Chromis hypsilepis", "Cirrhilabrus punctatus", "Coris auricularis", "Coris batuensis", "Coris bulbifrons", "Coris picta", "Dischistodus prosopotaenia", "Gomphosus varius", "Halichoeres margaritaceus", "Halichoeres marginatus", "Halichoeres melanochir", "Halichoeres melanurus", "Halichoeres miniatus", "Halichoeres nebulosus", "Halichoeres nigrescens", "Hemigymnus fasciatus", "Hemigymnus melapterus", "Labrichthys unilineatus", "Labropsis australis","Macropharyngodon meleagris", "Notolabrus gymnogenis", "Notolabrus parilus", "Notolabrus tetricus", "Ophthalmolepis lineolatus", "Ostorhinchus aureus", "Parupeneus spilurus", "Pempheris multiradiata","Pomacentrus adelus","Pomacentrus bankanensis","Pomacentrus grammorhynchus","Pomacentrus vaiuli", "Pomacentrus wardi","Pseudolabrus luculentus", "Scarus frenatus", "Scarus ghobban", "Scarus niger", "Scarus prasiognathos", "Scarus rivulatus", "Scarus schlegeli", "Thalassoma lunare", "Thalassoma lutescens", "Upeneichthys vlamingii") #50

#these species need lower priors for size (0.01 to 0.15) = use code spatial_lowerSigma.stan
lowerSigma <- c("Acanthurus blochii", "Acanthurus grammoptilus", "Acanthurus nigricauda", "Arripis trutta", "Caesio caerulaurea", "Chromis ternatensis", "Hemitaurichthys polylepis","Lutjanus fulviflamma", "Lutjanus gibbus", "Lutjanus quinquelineatus", "Monodactylus argenteus", "Mugil cephalus", "Mulloidichthys flavolineatus", "Mulloidichthys vanicolensis", "Myripristis violacea", "Myripristis vittata", "Naso brevirostris", "Paracaesio xanthura", "Pseudocaranx georgianus", "Pterocaesio digramma", "Pterocaesio tile", "Pterocaesio trilineata", "Sphyraena obtusata", "Trachurus declivis")#24


#these species need 1500 iterations and 1000 burn-in = use spatial_main.stan
longerRuns <- c("Trachurus novaezelandiae", "Schuettea scalaripinnis", "Trachinops taeniatus", "Trachinops caudimaculatus", "Trachinops noarlungae") 

#remaining species for which initial priors were suitable = use spatial_main.stan
mainSpecies <- which((!Species %in% higherB0) & (!Species %in% longerRuns) & (!Species %in% lowerB0) & (!Species %in% higherSigma) & (!Species %in% lowerSigma)) #222 species

df_main1 <- df %>% filter(TAXONOMIC_NAME %in% Species[mainSpecies[c(1:70)]])
save(df_main1, file = "data1.RData")

df_main2 <- df %>% filter(TAXONOMIC_NAME %in% Species[mainSpecies[c(71:140)]])
save(df_main2, file = "data2.RData")

df_main3 <- df %>% filter(TAXONOMIC_NAME %in% Species[mainSpecies[c(141:length(Species))]])
save(df_main3, file = "data3.RData")

df_higherB0 <- df %>% filter(TAXONOMIC_NAME %in% higherB0)
save(df_higherB0, file = "data4.RData")

df_lowerB0 <- df %>% filter(TAXONOMIC_NAME %in% lowerB0)
save(df_lowerB0, file = "data5.RData")

df_longer <- df %>% filter(TAXONOMIC_NAME %in% longerRuns)
save(df_longer, file = "data6.RData")

df_higherSigma <- df %>% filter(TAXONOMIC_NAME %in% higherSigma)
save(df_higherSigma, file = "data7.RData")

df_lowerSigma <- df %>% filter(TAXONOMIC_NAME %in% lowerSigma)
save(df_lowerSigma, file = "data8.RData")


```

### First set of Bayesian analyses: main analysis code 

The data files above were analysed on clusters and this is the code to be run for the main spatial analysis. Analyses don't have to be run here because ouputs are saved and available. The main analysis is done using the spatial_main.stan code. For some species spatial analyses had to be repeated with updated priors (higher b0 or lower b0) and related .stan files are named accordingly 

```{r, warning=FALSE, message=FALSE, echo = F, eval = F}
library(rstan)
library(tidyverse)

#Bayesian analyses are run for each species separately 
for (x in 1:length(higherB0)) {
#for (x in 1:length(Species)) {
 
sppno <- higherB0[x]
SpeciesName <- Species[sppno]
print(SpeciesName)
print(sppno)

#SpeciesName <- Species[x] #select the species 
#print(SpeciesName)
#print(x)

### prepare data 

df$y <- log(df$SizeClass)
df_fit <- df %>% filter(TAXONOMIC_NAME == SpeciesName)

df_fit$X1 <- (df_fit$meansst - mean(df_fit$meansst)) / sd(df_fit$meansst)
df_fit$X2 <- df_fit$X1^2

yVals <- sort(unique(df$SizeClass)) # fish size classes
I     <- length(yVals)              # number of fish size classes
yCuts <- rep(0, I-1)                # fish sizes that split size classes
for (i in 2:I) {
  yCuts[i-1] <- log(0.5*(yVals[i-1] + yVals[i])) 
}

###
df_stan <- df %>% 
  filter(TAXONOMIC_NAME == SpeciesName) %>%
  mutate(z1 = meansst - mean(meansst)) %>% # rescale size for fitting
  dplyr::select(year, month, day, geogroup, z1, SizeClass) %>%
  mutate(
    survey_date = as.Date(paste(year, month, day, sep = "-")),
    survey = paste(survey_date, geogroup, sep = "-")
  ) %>%
  arrange(survey) %>%
  mutate(indx_srv = as.integer(factor(survey)))

# add random factor indexes for year, location, and sample
years    <- sort(unique(df_stan$year))
df_years <- tibble(year = years, indx_year = 1:length(years))
grps    <- sort(unique(df_stan$geogroup))
df_grps <- tibble(geogroup = grps, indx_grp = 1:length(grps))
df_yVals <- tibble(SizeClass = yVals, indx_sc = 1:length(yVals))

df_stan <- left_join(df_stan, df_years, by = "year")
df_stan <- left_join(df_stan, df_grps,  by = "geogroup")
df_stan <- left_join(df_stan, df_yVals, by = "SizeClass")

df_n <- df_stan %>%
  group_by(indx_srv, indx_sc) %>%
  summarise(n = n())

max_sc <- length(yVals) # max(df_n$indx_sc)
max_srv <- max(df_n$indx_srv)

m_obs <- matrix(data = 0, nrow = max_srv, ncol = max_sc)
for (i in 1:nrow(df_n)) {
  m_obs[df_n$indx_srv[i],df_n$indx_sc[i]] <- df_n$n[i] 
}

df_srv <- df_stan %>%
  group_by(indx_srv) %>%
  summarise(
    z1   = median(z1),
    year = median(indx_year),
    grp  = median(indx_grp)
  )

stan_dat <- list(
  N = nrow(df_srv),               # surveys
  J = max(df_srv$grp),            # locations
  K = length(yCuts) + 1,          # fish size classes
  L = max(df_srv$year),           # years of data 
  cutoff = yCuts,                 # size class cut-offs
  y    = m_obs,                   # observations per size class
  x    = df_srv$z1,               # predictor variable
  gloc = df_srv$grp,              # locations
  yr   = df_srv$year              # year
)


fit <- stan(file = 'stan_models/spatial_main.stan', data = stan_dat, iter = 1000, warmup = 500, chains = 3, seed = 66, refresh = 250)

save(fit, file = paste("... place for saved fit results.../",SpeciesName,".RData",sep="")) #NOTE, saved fit results are not provided, but can be generated with this code 

}

```

### STAN code for the spatial analyses 

NOTE! This does not run in R, but has to be supplied as .stan file. All files are available in the stan_models subfolder

``` {r, warning=FALSE, message=FALSE, echo = F, eval = F}

// Shane A Richards 6/11/2018
// includes a random effect associated with location, year, and survey
// combines identical observations

data {
  int  <lower = 1> N; // number of surveys
  int  <lower = 1> J; // number of locations
  int  <lower = 1> K; // number of size classes
  int  <lower = 1> L; // number of year classes
  real <lower = 0>            cutoff[K-1]; // size class cut-offs
  matrix[N,K]                 y;           // observed fish size classes
  real                        x[N];        // observed predictor variable
  int  <lower = 1, upper = J> gloc[N];     // observed location
  int  <lower = 1, upper = L> yr[N];       // observed year (index only)
}

parameters {
  real <lower =  1.5,   upper = 3.5> beta0;       // mean (log)size
  real <lower = -0.50,  upper = 0.5> beta1;       // predictor of mean (log)size
//  real <lower =  2.0,   upper = 5.0> beta0;       // mean (log)size for larger b0 analyses   
  real <lower =  0.10,  upper = 0.30> sigma_size;  // variation in (log)sizes
  real <lower =  0.001, upper = 1.0> sigma_gloc;  // variation among locations
  real <lower =  0.001, upper = 1.5> sigma_yr;    // variation among years
  real <lower =  0.001, upper = 1.0> sigma_srv;   // variation among years
  real gloc_RE[J];  // estimated location-specific variation
  real yr_RE[L];    // estimated year-specific variation
  real srv_RE[N];   // estimated survey-specific variation
}

transformed parameters {
}

model {
  vector[K-1] cpr;
  vector[K] pr;
  // real cpr[K-1] = 0.0;   // cumulative normal probability
  // real pr[K] = 0.0;      // probability of observed size class
  real mu;         // mean log fish size 
  real eps = 0.01; // a small probability
  real c1;
  real c2;

  c1 = (1.0 - eps);
  c2 = eps/K;
  
  // generate location and year specific variation 
  gloc_RE ~ normal(0.0, sigma_gloc); 
  yr_RE   ~ normal(0.0, sigma_yr); 
  srv_RE  ~ normal(0.0, sigma_srv); 

  for (i in 1:N) { // for each survey 
    mu = beta0 + beta1*x[i] + gloc_RE[gloc[i]] + yr_RE[yr[i]] + srv_RE[i];
    
    for (k in 1:(K-1)) {
      cpr[k] = normal_cdf(cutoff[k], mu, sigma_size);
    }
    
    pr[1] = c1*cpr[1] + c2;
    for (k in 2:(K-1)) {
      pr[k] = c1*(cpr[k] - cpr[k-1]) + c2;
    }
    pr[K] = c1*(1.0 - cpr[K-1]) + c2;
    
    for (k in 1:K) {
      target += y[i,k]*log(pr[k]); // add the log-likelihood term
    }
  }
}


```

### Spatial postanalyses processing: traceplots
this code was used to assess convergence; individual species traceplots are not provided here, but can be reproduced using the chunks above and this code

```{r, warning=FALSE, message=FALSE, echo = F, eval = F}
myspp <- sort(unique(df$TAXONOMIC_NAME))

##Plot traceplots and posterior distributions

model_params <- c("beta0", "beta1", "sigma_size", "sigma_gloc", "sigma_yr", "sigma_srv")
l_trace <- vector("list", 1) # create an empty list
l_plots <- vector("list", 1) # create an empty list

#plot all diagnostics into one pdf file
pdf(file="diagnostics.pdf",onefile=TRUE) 

for (i in 1:length(myspp)) {

SpeciesName <- myspp[i] #select the species 
print(SpeciesName)
print(i)

#load saved outputs from individual Bayesian analyses
load(file = paste("...save outputs here... /",SpeciesName,".RData",sep=""))

l_trace[[i]] <- traceplot(fit, pars = model_params, inc_warmup = TRUE, ncol = 3)
print(traceplot(fit, pars = model_params, inc_warmup = TRUE, ncol = 3))

extracted <- rstan::extract(fit, pars=model_params)
df_prm_pdf <- reshape2::melt(extracted, value.name="Parameter")

# plot posterior distributions
df_prm_pdf$L1 <- factor(df_prm_pdf$L1, levels = c("beta0", "beta1", 
                                                      "sigma_size", "sigma_gloc", "sigma_yr", "sigma_srv"))
    
l_plots[[i]] <- ggplot(data=df_prm_pdf, aes(x = Parameter)) + 
      labs(title = SpeciesName) +
      geom_density(fill = "wheat") +
      labs(
        x    = "Parameter Value",
        y    = "Probability density"
      ) +
      facet_wrap( ~ L1, scale = "free", ncol=3) +
      theme_bw()

print(ggplot(data=df_prm_pdf, aes(x = Parameter)) + 
  labs(title = SpeciesName) +
  geom_density(fill = "wheat") +
  labs(
    x    = "Parameter Value",
    y    = "Probability density"
  ) +
  facet_wrap( ~ L1, scale = "free", ncol=3) +
  theme_bw())

print(i)
}

dev.off()

```

### Extract posterior distribution quantiles from individual Bayesian analyses
this chunk does not need to be run because outputs are saved and provided separately

```{r, warning=FALSE, message=FALSE, echo = F, eval = F}

load(file = "inputs/fish_data.RData")
df <- main_data
Species <- sort(unique(df$TAXONOMIC_NAME))

##Now  get confidence intervals 
model_params <- c("beta0", "beta1", "sigma_size", "sigma_gloc", "sigma_yr", "sigma_srv")
l_CI    <- vector("list", 1) # create an empty list

for (i in 1:length(Species)) {

SpeciesName <- Species[i]
print(SpeciesName)
print(i)

  #enter the directory for saved Bayesian fits from the chunk above
  load(file = paste("...save outputs here... /",SpeciesName,".RData",sep=""))
  
  extracted <- rstan::extract(fit, pars=model_params)
  df_prm_pdf <- reshape2::melt(extracted, value.name="Parameter")
  
  df_summary <- df_prm_pdf %>% group_by(L1) %>% 
    summarise(
      n= n(), # samples
      p.025 = quantile(Parameter, probs = 0.025),
      p.100 = quantile(Parameter, probs = 0.100),
      p.500 = quantile(Parameter, probs = 0.500),
      p.900 = quantile(Parameter, probs = 0.900),
      p.975 = quantile(Parameter, probs = 0.975)
    )
  
  l_CI[[i]] <- df_summary
}

l_CI335 <- l_CI

### And put results in a dataframe

bet1ci = data.frame(NA, nrow = length(Species), ncol = 19)

for (i in 1:length(Species)) {
  
SpeciesName <- Species[i]
print(SpeciesName)
print(i)

  #beta0 intervals
  bet1ci[i,1] <- l_CI[[i]]$p.025[1]
  bet1ci[i,2] <- l_CI[[i]]$p.100[1]
  bet1ci[i,3] <- l_CI[[i]]$p.500[1]
  bet1ci[i,4] <- l_CI[[i]]$p.900[1]
  bet1ci[i,5] <- l_CI[[i]]$p.975[1]
  #beta1 intervals
  bet1ci[i,6] <- l_CI[[i]]$p.025[2]
  bet1ci[i,7] <- l_CI[[i]]$p.100[2]
  bet1ci[i,8] <- l_CI[[i]]$p.500[2]
  bet1ci[i,9] <- l_CI[[i]]$p.900[2]
  bet1ci[i,10] <- l_CI[[i]]$p.975[2]
  #sigma size intervals 
  bet1ci[i,11] <- l_CI[[i]]$p.025[4]
  bet1ci[i,12] <- l_CI[[i]]$p.100[4]
  bet1ci[i,13] <- l_CI[[i]]$p.500[4]
  bet1ci[i,14] <- l_CI[[i]]$p.900[4]
  bet1ci[i,15] <- l_CI[[i]]$p.975[4]
  
  bet1ci[i,16] <- df %>% filter (TAXONOMIC_NAME == SpeciesName) %>% summarise(mid = first(midpoint))
  bet1ci[i,17] <- df %>% filter (TAXONOMIC_NAME == SpeciesName) %>% summarise(Lmax = first(MaxLenFB))
  bet1ci[i,18] <- df %>% filter (TAXONOMIC_NAME == SpeciesName) %>% summarise(Lmax = first(MaxSizeObs))
  bet1ci[i,19] <- SpeciesName
  
}

colnames(bet1ci) = c("b0_p025","b0_p10","b0_p50","b0_p90","b0_p975","p025","p10","p50","p90","p975","sigsize_p025","sigsize_p10","sigsize_p50","sigsize_p90","sigsize_p975","midpoint","MaxLenFB","MaxLenObs","species")

save(bet1ci, file = "analysesOutputs/slopesInSpace335spp.RData")


```

### Supplementary Table1: print spatial slopes

```{r, warning=FALSE, message=FALSE, echo = F, eval = T}
load(file = "analysesOutputs/slopesInSpace335spp.RData")

slopes_to_print <- bet1ci %>% select(species, MaxLenObs, midpoint, p025, p10, p50, p90, p975)
colnames(slopes_to_print) = c("species","MaxObservLength","preferred_temp","b1_p025","b1_p10","b1_p50","b1_p90","b1_p975")

knitr::kable(slopes_to_print)

#write.csv(slopes_to_print, file = "ExtendedDataFigures/ExtendedDataTable1.csv")

```

### Fig1A: second spatial Bayesian analysis, effects of temperature midpoint

```{r, warning=FALSE, message=FALSE, echo = F, eval = T, fig.width=10, fig.height=6}

load(file = "analysesOutputs/slopesInSpace335spp.RData")

df <- bet1ci %>% dplyr::select (p025, p10, p50, p90, p975, midpoint, MaxLenObs, species)

#make a data file for regression
df_fit <- df %>%
  mutate(
   # SD1 = (p90-p10)/(2*1.281552),
    SD = (p975-p025)/(2*1.96), #presumably we just need a z score for this
    x = midpoint
  ) %>%
  select(Species = species, x, y = p50, SD)

df_fit <- na.omit(df_fit)

x_mean <- mean(df_fit$x) # rescaling value

stan_dat <- list(
  N    = nrow(df_fit),      # Number of species
  x    = df_fit$x - x_mean, # transformed x-val 
  SD   = df_fit$SD,         # SD of y-values
  y    = df_fit$y           # y-values
)

#Run second baysian analysis on the overall slopes of all species
#fit <- stan(file = 'stan_models/spatial_secondFit.stan', data = stan_dat,
#  iter = 3500, warmup = 1000, chains = 4, seed = 111, thin = 10)
#save(fit, file = "analysesOutputs/spaceSlope_temp_fit.RData")

load(file = "analysesOutputs/spaceSlope_temp_fit.RData")

#extract params, beta2 is a quadratic parameter and we can see that it is non-zero
model_params <- c("beta0", "beta1", "sigma")

# display the posterior distribution statistics
#print(fit, pars = model_params, probs = c(0.025,0.5,0.975), digits=5)

#traceplot(fit, pars = model_params, inc_warmup = TRUE, ncol = 4)

l_params <- rstan::extract(fit, pars = model_params)

df_prm_pdf <- reshape2::melt(l_params, value.name="Value")
names(df_prm_pdf)[ncol(df_prm_pdf)] <- "Parameter"

# plot posterior distributions
df_prm_pdf$Parameter <- factor(df_prm_pdf$Parameter, 
  levels = model_params)
    
#ggplot(data=df_prm_pdf, aes(x = Value, fill = Parameter)) + 
#  geom_density() +
#  labs(
#    x    = "Value",
#    y    = "Probability density"
#  ) +
#  facet_wrap( ~ Parameter, scale = "free") +
#  theme_bw()

## Now we create 1000 slopes of temperature responses along 100 values of tempreature ranging from min and max midpoints using 1000 posterior infered values. From these 1000 slopes we get 2.5, 50 and 97.5% quantiles to define the overall uncertainty of response 

x_num <- 100
max_iterations <- max(df_prm_pdf$iterations)
m_mu <- matrix(data = 0, nrow = max_iterations, ncol = x_num)
x_vals <- seq(from = min(df_fit$x), to = max(df_fit$x), length.out = x_num)

for (i in 1:max_iterations) {
  beta0 <- l_params$beta0[i]
  beta1 <- l_params$beta1[i]
  m_mu[i, ] <- beta0 + beta1*(x_vals - x_mean)
}

df_CI <- tibble(
  x = x_vals,
  mu_025 = 0,
  mu_500 = 0,
  mu_975 = 0
)

# Get quantile values for the uncertainty ranges
for (i in 1:x_num) {
  df_CI$mu_025[i] <- quantile(m_mu[ ,i], probs = 0.025)
  df_CI$mu_500[i] <- quantile(m_mu[ ,i], probs = 0.500)
  df_CI$mu_975[i] <- quantile(m_mu[ ,i], probs = 0.975)
}

#Or use 80% 
#for (i in 1:x_num) {
#  df_CI$mu_025[i] <- quantile(m_mu[ ,i], probs = 0.1)
#  df_CI$mu_500[i] <- quantile(m_mu[ ,i], probs = 0.500)
#  df_CI$mu_975[i] <- quantile(m_mu[ ,i], probs = 0.9)
#}

#define the uncertainty polygon to be plotted
df_polygon <- tibble(
  x = c(df_CI$x,rev(df_CI$x)),
  y = c(df_CI$mu_025,rev(df_CI$mu_975))
)

#assign colour to species depending on their resposes
df_fit <- df_fit %>%
  mutate(sig = ifelse(y < 0, ifelse(y + 2*SD < 0, -1, 0), ifelse(y - 2*SD > 0, 1, 0)))
df_fit$sig <- factor(df_fit$sig)

#or use my old way to colour and add error bars
#first add p10 and p90 percentiles from teh Baysian fit
df_fit$p10 <- bet1ci$p10[match(df_fit$Species, bet1ci$species)]
df_fit$p90 <- bet1ci$p90[match(df_fit$Species, bet1ci$species)]

df_fit$color <- 0
df_fit$color[which(df_fit$p90 < 0)] <- -1
df_fit$color[which(df_fit$p10 > 0)] <- 1 
df_fit$color <- factor(df_fit$color)

data_subset0 <- df_fit %>% filter(color == -1) #decreasing
data_subset1 <- df_fit %>% filter(color == 1) # increasing 
data_subset2 <- df_fit %>% filter(color == 0) #no change

p1 <- ggplot(df_CI) +
  geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
#  geom_point(data = df_fit, aes(x = x, y = y, color = color)) +
  geom_point(data = data_subset2, aes(x = x, y = y, col = "grey")) + 
  geom_errorbar(data = data_subset2, aes(x = x, ymin = p10, ymax = p90, color = "grey"), width=0) +
  geom_point(data = data_subset1, aes(x = x, y = y, color = "blue")) + 
  geom_errorbar(data = data_subset1, aes(x = x, ymin = p10, ymax = p90, color = "blue"), width=0) +
    geom_point(data = data_subset0, aes(x = x, y = y, color = "red")) + 
#  geom_errorbar(data = df_fit, aes(x = x, ymin = p10, ymax = p90, color = color), width=0) +
  geom_errorbar(data = data_subset0, aes(x = x, ymin = p10, ymax = p90, color = "red"), width=0) +
  geom_polygon(data = df_polygon, aes(x = x, y = y), fill = "#feb24c") +
  geom_line(aes(x = x, y = mu_500)) +
  labs(
    x = expression("Species thermal affinity ("~degree~"C)"),
    y = expression("Relative change in body length (per "~degree~"C)")
  ) +
  ylim(-0.25, 0.25) +   # add ylim to remove the outlier 
  #  xlim (-0.03, 0.03) +
  scale_colour_manual(values=c("#de2d26", "grey70", "#3182bd")) +
  guides(color=FALSE) +
#  geom_vline(xintercept = 23, linetype = "dashed") +
  theme_bw() +
  theme(
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(),
    axis.title.x = element_text(size=14),
    axis.title.y = element_text(size=14),
    axis.text=element_text(size=12)
  )


p1

```

### Fig1B: second spatial Bayesian analysis, effects of maximum body size

```{r, warning=FALSE, message=FALSE, echo = F, eval = T, fig.width=10, fig.height=6}
#rm(list = ls()) # clear memory

#Load the space first Bayesian CI data (cleaned out, so it has 320 species)
load(file = "analysesOutputs/slopesInSpace335spp.RData")
df <- bet1ci %>% dplyr::select (p025, p10, p50, p90, p975, midpoint, MaxLenFB, MaxLenObs, species)

########
#Or if testing effects of 10% data exclusion
#load(file = "RDataOutputs/testExcl.RData")
#spp = testExcl$species
#df_test <- df[-which(df$species %in% spp),]
#df_test2 <- df[which(df$species %in% spp),]
#replace 20 values with the df matrix with the new results without any data exclusion
#df_test2$p025 <- testExcl$p025[match(df_test2$species, testExcl$species)]
#df_test2$p10 <- testExcl$p10[match(df_test2$species, testExcl$species)]
#df_test2$p50 <- testExcl$p50[match(df_test2$species, testExcl$species)]
#df_test2$p90 <- testExcl$p90[match(df_test2$species, testExcl$species)]
#df_test2$p975 <- testExcl$p975[match(df_test2$species, testExcl$species)]
#df_excl <- rbind(df_test, df_test2)
#save(df_excl, file = "RDataOutputs/df_exclusionTest.RData")
#for the purposes of analyses below replace df with new dataset
#df <- df_excl
########

df_fit <- df %>%
  mutate(
    SD = (p975-p025)/(2*1.96), 
    x = log(MaxLenObs)
  ) %>%
  select(Species = species, x, y = p50, SD)

x_mean <- mean(df_fit$x) # rescaling value

stan_dat <- list(
  N    = nrow(df_fit),      # Number of species
  x    = df_fit$x - x_mean, # transformed x-val 
  SD   = df_fit$SD,         # SD of y-values
  y    = df_fit$y           # y-values
)

#Fit second Bayesian model - don't need to run it, as results are saved
#fit <- stan(file = 'stan_models/spatial_secondFit.stan', data = stan_dat,
#  iter = 3500, warmup = 1000, chains = 4, seed = 111, thin = 10)
#save(fit, file = "analysesOutputs/spaceSlope_length_fit.RData")

## Load the main fit data set
load(file = "analysesOutputs/spaceSlope_length_fit.RData")

model_params <- c("beta0", "beta1", "sigma")

# display the posterior distribution statistics
#print(fit, pars=model_params, probs=c(0.025,0.5,0.975), digits=5)

##Baysian results with 20 values replaced without data exclusion
#          mean se_mean      sd     2.5%      50%    97.5% n_eff    Rhat
#beta0 -0.00496 0.00007 0.00198 -0.00877 -0.00501 -0.00101   768 0.99744
#beta1  0.01828 0.00012 0.00342  0.01157  0.01828  0.02469   874 1.00176
#sigma  0.02805 0.00006 0.00180  0.02449  0.02807  0.03150   971 0.99840

#Baysian result of the original analysis
#          mean se_mean      sd     2.5%      50%    97.5% n_eff    Rhat
#beta0 -0.00484 0.00007 0.00197 -0.00868 -0.00479 -0.00117   826 1.00231
#beta1  0.01948 0.00011 0.00334  0.01330  0.01937  0.02585   973 0.99698
#sigma  0.02811 0.00007 0.00180  0.02477  0.02811  0.03161   757 1.00706

#traceplot(fit, pars = model_params, inc_warmup = TRUE, ncol = 4)

l_params <- rstan::extract(fit, pars = model_params)

df_prm_pdf <- reshape2::melt(l_params, value.name="Value")
names(df_prm_pdf)[ncol(df_prm_pdf)] <- "Parameter"

# plot posterior distributions
df_prm_pdf$Parameter <- factor(df_prm_pdf$Parameter, levels = model_params)
    
#ggplot(data=df_prm_pdf, aes(x = Value, fill = Parameter)) + 
#  geom_density() +
#  labs(
#    x    = "Value",
#    y    = "Probability density"
#  ) +
#  facet_wrap( ~ Parameter, scale = "free") +
#  theme_bw()

## Now run the same iterations as above to get 
x_num <- 100
max_iterations <- max(df_prm_pdf$iterations)
m_mu <- matrix(data = 0, nrow = max_iterations, ncol = x_num)
x_vals <- seq(from = min(df_fit$x), to = max(df_fit$x), length.out = x_num)

for (i in 1:max_iterations) {
  beta0 <- l_params$beta0[i]
  beta1 <- l_params$beta1[i]
  m_mu[i, ] <- beta0 + beta1*(x_vals - x_mean)
}

df_CI <- tibble(
  x = x_vals,
  mu_025 = 0,
  mu_500 = 0,
  mu_975 = 0
) %>%
mutate(x_orig = 10^x)

for (i in 1:x_num) {
  df_CI$mu_025[i] <- quantile(m_mu[ ,i], probs = 0.1)
  df_CI$mu_500[i] <- quantile(m_mu[ ,i], probs = 0.500)
  df_CI$mu_975[i] <- quantile(m_mu[ ,i], probs = 0.9)
}

df_polygon <- tibble(
  x = c(df_CI$x,rev(df_CI$x)),
  y = c(df_CI$mu_025,rev(df_CI$mu_975))
) %>%
mutate(x_orig = 10^x)

df_fit <- df_fit %>% mutate(x_orig = 10^x)

df_fit$p10 <- bet1ci$p10[match(df_fit$Species, bet1ci$species)]
df_fit$p90 <- bet1ci$p90[match(df_fit$Species, bet1ci$species)]

df_fit$color <- 0
df_fit$color[which(df_fit$p90 < 0)] <- -1
df_fit$color[which(df_fit$p10 > 0)] <- 1 
df_fit$color <- factor(df_fit$color)

ranlen = runif(n = length(df_fit$x), min = 0.05, max = 0.15)
df_fit$x_rand <- df_fit$x + ranlen


data_subset0 <- df_fit %>% filter(color == -1) #decreasing
data_subset1 <- df_fit %>% filter(color == 1) # increasing 
data_subset2 <- df_fit %>% filter(color == 0) #no change

p2 <- ggplot(df_CI) +
  geom_abline(intercept = 0, slope = 0, linetype = "dashed") +
#  geom_point(data = df_fit, aes(x = x, y = y, color = color)) +
  geom_point(data = data_subset2, aes(x = x_rand, y = y, col = "grey")) + 
  geom_errorbar(data = data_subset2, aes(x = x_rand, ymin = p10, ymax = p90, color = "grey"), width=0) +
  geom_point(data = data_subset1, aes(x = x_rand, y = y, color = "blue")) + 
  geom_errorbar(data = data_subset1, aes(x = x_rand, ymin = p10, ymax = p90, color = "blue"), width=0) +
    geom_point(data = data_subset0, aes(x = x_rand, y = y, color = "red")) + 
# geom_errorbar(data = df_fit, aes(x = x_rand, ymin = p10, ymax = p90, color = color), width=0) +
  geom_errorbar(data = data_subset0, aes(x = x_rand, ymin = p10, ymax = p90, color = "red"), width=0) +
  geom_polygon(data = df_polygon, aes(x = x, y = y), fill = "#feb24c") +
  geom_line(aes(x = x, y = mu_500)) +
  labs(
    x = expression("Maximum body length, cm"),
    y = expression("Relative change in body length (per "~degree~"C)")
  ) +
  scale_colour_manual(values=c("#de2d26", "grey70", "#3182bd")) +
   ylim(-0.25, 0.25) + 
  scale_x_continuous(breaks=c(log(20), log(50), log(150)), labels=c("20", "50", "150")) +
  guides(color=FALSE) +
  theme_bw() +
  theme(
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(),
    axis.title.x = element_text(size=14),
    axis.title.y = element_text(size=14),
    axis.text=element_text(size=12)
  )
p2

plot_grid(p1, p2, labels = c("A", "B"), label_size = 26)


```

### Supplementary Table 2: spatial statistics

```{r, warning=F}
load(file = "analysesOutputs/slopesInSpace335spp.RData")

length(which(bet1ci$p50 < 0)) # 189 have negative slopes 
length(which(bet1ci$p50 > 0)) # 146 have positive slopes (larger where warmer)

length(which(bet1ci$p90 < 0)) # 97 species have negative significant responses 
length(which(bet1ci$p10 > 0)) # 64 have significant positive responses 

increasing <- bet1ci[which(bet1ci$p10 > 0),]
decreasing <- bet1ci[which(bet1ci$p90 < 0),]

tempSpace <- bet1ci %>% filter (midpoint <= 23) #temperate species
tropSpace <- bet1ci %>% filter (midpoint > 23) #tropical species

## analysis for temperate and tropical separately

### temperate
length(which(tempSpace$p10 > 0)) #33
length(which(tempSpace$p90 < 0)) #35
# proportion from significantly changing species (68 in total)
length(which(tempSpace$p10 > 0))/68
length(which(tempSpace$p90 < 0)) / 68

#proportion from all temporal 
length(which(tempSpace$p50 > 0)) #57
length(which(tempSpace$p50 > 0))/length(tempSpace$species) #0.49
length(which(tempSpace$p50 < 0)) #59
length(which(tempSpace$p50 < 0))/length(tempSpace$species) #0.51

### tropical
length(which(tropSpace$p10 > 0)) #31
length(which(tropSpace$p90 < 0)) #62
# proportion from significantly changing species (93 in total)
length(which(tropSpace$p10 > 0))/(31+62)
length(which(tropSpace$p90 < 0)) / (31+62)

#proportion from all temporal 
length(which(tropSpace$p50 > 0)) #88
length(which(tropSpace$p50 > 0))/length(tropSpace$species) #0.40
length(which(tropSpace$p50 < 0)) #130
length(which(tropSpace$p50 < 0))/length(tropSpace$species) #0.60

```

### Supplementary Figure 3: testing if exclusion of juveniles might affect Fi1B results

The full analysis is not given here, as it is a repeat of the analysis above for 20 species, but with all size data included. 
First get the data

```{r, warning=FALSE, message=FALSE, echo = F, eval = F}
#rm(list = ls()) # clear memory

## select small species with sufficient data to test data exclusion effect
smallSpp <- main_data %>% group_by(TAXONOMIC_NAME) %>% summarise(maxlen = first(MaxSizeObs), count = n()) %>% filter (maxlen < 20) %>% filter (count > 5000)

#these are species that were small and had negative slopes
smallTest <- c("Chromis nitida","Pomacentrus milleri","Pomacentrus coelestis","Dascyllus reticulatus","Pomacentrus bankanensis","Cirripectes filamentosus","Pomacentrus lepidogenys","Dotalabrus alleni","Dascyllus aruanus","Pomacentrus vaiuli","Plectroglyphidodon lacrymatus","Pseudochromis fuscus","Chromis ternatensis","Labropsis australis")

#Check which of these small species also satisfy data criteria, so I only repeat analyses on small spp with large data amounts
smallToRun <- smallTest[which(smallTest %in% smallSpp$TAXONOMIC_NAME)] #this gives 10spp

#Repeat the same for big species
## select big species that have lots of data
bigSpp <- df %>% group_by(TAXONOMIC_NAME) %>% summarise(maxlen = first(MaxSizeObs), count = n()) %>% filter (maxlen > 45) %>% filter (count > 1500)

#these were big speices that mostly had positive slopes
bigTest <- c("Kyphosus sydneyanus","Arripis trutta","Dinolestes lewini","Choerodon rubescens","Cheilodactylus spectabilis","Achoerodus viridis","Caranx melampygus","Dactylophora nigricans","Coris aygula","Plectropomus leopardus","Heterodontus portusjacksoni","Achoerodus gouldii")

#These will be run for the tests (10 spp)
bigToRun <- bigTest[which(bigTest %in% bigSpp$TAXONOMIC_NAME)] #this gives 9 species, so I will add "Caranx melampygus" which is also big and had a positive slope, even though it had less data
## these small and big species where then analysed using procedures as above
```

And then plot

```{r, warning=FALSE, message=FALSE, echo = F, eval = T, fig.width=10, fig.height=6}
### Load the slope esimates for 20 spp with juvenile data
load(file = "inputs/testExcl.RData")
#load the original values
load(file = "analysesOutputs/slopesInSpace335spp.RData")

testExcl$p50_old <- bet1ci$p50[match(testExcl$species, bet1ci$species)]
testExcl$p10_old <- bet1ci$p10[match(testExcl$species, bet1ci$species)]
testExcl$p90_old <- bet1ci$p90[match(testExcl$species, bet1ci$species)]
testExcl$maxSize <- bet1ci$MaxLenObs[match(testExcl$species, bet1ci$species)]

ggplot(testExcl, aes(x = p50_old, y = p50)) +
  geom_vline(xintercept = 0) +
  geom_hline(yintercept = 0) +
  geom_vline(xintercept = 0.05, linetype = "dashed", color = "grey20") +
  geom_hline(yintercept = 0.05, linetype = "dashed", color = "grey20") +
  geom_vline(xintercept = -0.05, linetype = "dashed", color = "grey20") +
  geom_hline(yintercept = -0.05, linetype = "dashed", color = "grey20") +
  geom_errorbar(aes(ymin = p10, ymax = p90), color = "darkgrey") +
  geom_errorbarh(aes(xmin = p10_old, xmax = p90_old), color = "darkgrey") +
  geom_point(color = "black") +
  geom_abline(intercept = 0, slope = 1, color = "orange", size = 1) +
 # geom_text(aes(label=species), nudge_y = 0.001, size = 5) + 
  theme_bw() +
  theme(
    panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), 
    axis.title.x = element_text(size=12),
    axis.title.y = element_text(size=12),
    axis.text=element_text(size=12)) +
   ggtitle("Comparing lower 10% size data exclusion on b1 slope estimates") + 
  labs(
    y    = "b1 slopes (and 80% PPD) with all data",
    x    = "b1 slopes (and 80% PPD) with smallest 10% individuals removed"
  )
```

Also here are results of the second hierarchical Bayesian analsyes when the data for 20 species was replaced with new slope estimates

*Bayesian results with 20 values replaced without data exclusion*
          mean se_mean      sd     2.5%      50%    97.5% n_eff    Rhat
beta0 -0.00496 0.00007 0.00198 -0.00877 -0.00501 -0.00101   768 0.99744
beta1  0.01828 0.00012 0.00342  0.01157  0.01828  0.02469   874 1.00176
sigma  0.02805 0.00006 0.00180  0.02449  0.02807  0.03150   971 0.99840

*Bayesian result of the original analysis*
          mean se_mean      sd     2.5%      50%    97.5% n_eff    Rhat
beta0 -0.00484 0.00007 0.00197 -0.00868 -0.00479 -0.00117   826 1.00231
beta1  0.01948 0.00011 0.00334  0.01330  0.01937  0.02585   973 0.99698
sigma  0.02811 0.00007 0.00180  0.02477  0.02811  0.03161   757 1.00706

### Extended Data Fig. 2: correlation to abundance skew

```{r, warning=FALSE, message=FALSE, echo = F, eval = T, fig.width=10, fig.height=6}
#slopes 
load(file = "analysesOutputs/slopesInSpace335spp.RData")
#thermal skew data from Waldock et al. 2019
skew <- read.csv(file = "inputs/thermal_skew.csv")

bet1ci$skew <- skew$T_Skew_Observations[match(bet1ci$species, skew$SpeciesName)]
bet1ci$skewConf <- skew$confidence[match(bet1ci$species, skew$SpeciesName)]
bet1ci$Topt <- skew$Topt[match(bet1ci$species, skew$SpeciesName)]
bet1ci$Topt_midp <- bet1ci$Topt - bet1ci$midpoint
bet1ci$range <- abs(bet1ci$p90 - bet1ci$p10)

#Is there correlation?
cor.test(bet1ci$skew, bet1ci$p50)

# Plot
par(mfrow = c(1,1), mar = c(5,5,3,2))
plot(bet1ci$skew, bet1ci$p50, pch=19, cex = 1, ylab = "Slope of body size - SST correlation", xlab = "Slope of species abundance - SST correlation", bty = "l", cex.lab = 1.5)
abline(lm(p50 ~ skew, data = bet1ci), lwd =2, col = "red")


```

### TEMPORAL ###

### Supplementary Table 3: SST trends in locations

```{r, warning=FALSE, message=FALSE, echo = F, eval = F, fig.width=10, fig.height=6}
#Read in monthly SST data for nine long term locations
df_data <- read_csv("inputs/CondensedTemps.csv") # read in 

df_data$Loc <- factor(df_data$Loc) # make location a factor

ggplot(df_data, aes(x = date_dec, y = meansst, color = Loc)) +
  geom_point() +
  facet_wrap( ~ Loc) +
  guides(color = FALSE) +
  labs(x = "Date (decimal)", y = "Sea-surface temperature (degrees Celsius)")

# Fit a single location
Locs <- levels(df_data$Loc)
Locs # show locations where data are available
Loc_fitted <- 3 # the location fitted to the data (see list above)

df_fit <- df_data %>%
  filter(Loc == Locs[Loc_fitted]) # extract the location of interest

stan_dat <- list(
  N    = nrow(df_fit),           # number of temperature readings
  dd   = df_fit$date_dec - 2018, # decimal date (since 2018)
  msst = df_fit$meansst          # mean sea-surface temperature
)

# perform a Bayesian fit (not run now, but fits are saved)
#fit <- stan(file = 'stan_models/SST_fit.stan', data = stan_dat, # output is supressed
#  iter = 3000, warmup = 1000, chains = 3, seed = 66, thin = 20)
#save(fit, file = "analysesOutputs/SST_fit.RData")

load(file = "analysesOutputs/SST_fit.RData")
model_params <- c("beta0", "beta1", "beta2", "phi", "sigma")

# display the posterior distribution statistics
print(fit, pars=model_params, probs=c(0.025,0.1,0.5,0.9,0.975), digits=4)

#display traceplot to check convergence
traceplot(fit, pars = model_params, inc_warmup = TRUE, ncol = 3)

# show density distributions
N_extract <- rstan::extract(fit, pars=model_params)

df_prm_pdf <- reshape2::melt(N_extract, value.name="Parameter")

# plot posterior distributions
df_prm_pdf$L1 <- factor(df_prm_pdf$L1, 
  levels = c("beta0", "beta1", "beta2", "phi", "sigma"))
    
title <- paste("Posterior probability distributions for ", Locs[Loc_fitted], sep = "")

ggplot(data=df_prm_pdf, aes(x = Parameter)) + 
  labs(title = title) +
  geom_density(fill = "grey75") +
  labs(
    x    = "Value",
    y    = "Probability density"
  ) +
  facet_wrap( ~ L1, scale = "free", ncol=3) +
  theme_bw()


# Is the fit adequate?
REPS <- 10
N <- length(N_extract$beta0)
Rep <- sample(1:N, REPS)
tm <- seq(from = 1980, to = 2019, by = 0.05)
Nt <- length(tm)

df_plot <- df_fit
df_plot$decade = factor(10 *(df_plot$date_dec %/% 10))

p_base <- ggplot(df_plot, aes(x = date_dec, y = meansst)) +
  geom_point(color = "grey45") +
  facet_wrap(~ decade, scale = "free_x") +
  labs(x = "Year", y = "Mean monthly sea-surface temperature (C)") +
  theme_bw()

for (i in 1:REPS) {
  beta0 <- N_extract$beta0[Rep[i]]
  beta1 <- N_extract$beta1[Rep[i]]
  beta2 <- N_extract$beta2[Rep[i]]
  phi   <- N_extract$phi[Rep[i]]
  mu <- beta0 + beta1 * (tm - 2018) + beta2 * cos(2*3.14159*((tm - 2018)-phi))

  df_line <- tibble(
    date_dec = tm,
    meansst = mu
  )  
  df_line$decade = factor(10 *(df_line$date_dec %/% 10))

  
  p_base <- p_base + geom_line(data = df_line, aes(x = date_dec, y = meansst), color = "red")
}

p_base + labs(title = Locs[Loc_fitted])

## Now fit for all locations

#create a datafile to store results
df_beta1 <- tibble(
  Loc = Locs, # 9 locations
  Mean = 0,    # this will be updated
  q10 = 0,
  q90 = 0
  
)

#run bayesian fits for all locations
for (Loc_fitted in 1:length(Locs)) {
  df_fit <- df_data %>% filter(Loc == Locs[Loc_fitted])

  stan_dat <- list(
    N    = nrow(df_fit),           # number of temperature readings
    dd   = df_fit$date_dec - 2018, # decimal date (since 2018)
    msst = df_fit$meansst          # mean sea-surface temperature
  )

  fit <- stan(file = 'stan_models/SST_fit.stan', data = stan_dat, 
    iter = 3000, warmup = 1000, chains = 3, seed = 66, thin = 20, refresh = 0)
  
  # store the long-term rates of change
  df_beta1$Mean[Loc_fitted] <- get_posterior_mean(fit)[2,4]
  df_beta1$q10[Loc_fitted] <- quantile(extract(fit, pars = "beta1")[[1]], probs = c(0.1))
  df_beta1$q90[Loc_fitted] <- quantile(extract(fit, pars = "beta1")[[1]], probs = c(0.9))
}


df_beta1 # show estimated annual rates of change in sst for 9 locations
#It turns out Maria is saved as a factor and not character and does not match exact name (it needs a space at the end). So I rewrite it
df_beta1$Loc[5] <- as.character("Maria ")
df_beta1$Loc <- as.character(df_beta1$Loc)

#SSTtrendFit <- df_beta1
#save(SSTtrendFit, file = "analysesOutputs/SSTtrend_fits.RData")
```

### Table of long-term SST trends

```{r, warning=FALSE, message=FALSE, echo = F, eval = T, fig.width=10, fig.height=6}
load(file = "analysesOutputs/SSTtrend_fits.RData")

knitr::kable(SSTtrendFit)

```

### Create temporal data files 

```{r, warning=FALSE, message=FALSE, echo = F, eval = F}

## Temporal data set with likely erroneous records and lower 10% filtered out and only for the 9 locations with long-term data available 
#THis was used to filter onl long-term locations
#yearsPerLoc <- VertData %>% filter (!Loc %in% badlocs) %>% group_by(Loc) %>% summarise (miny = min(year), maxy = max(year), timespan = maxy - miny, LocationFirst = first(Location), LocationLast = last(Location), lattmean = mean(SiteLat), lattsd = sd(SiteLat), longmean = mean(SiteLong), longsd = sd(SiteLong), counts = n(), spp = n_distinct(TAXONOMIC_NAME), years = n_distinct(year), geogr = first(geogroup)) %>% filter (timespan > 14) %>% filter (years > 10)

load(file="inputs/VertData_Temporal.RData")
## Add site mpa status where each site name has been checked by Graham
#VertData_temporal$mpa <- mpa_checked$mpaFinal[match(VertData_temporal$`Site name`, mpa_checked$sitename)]

#Cross check it again by comparing to the datafile from Graham
#mpatest <- VertData_temporal %>% group_by(SiteCode) %>% summarise(loc = first(Loc), mpa = first(mpa), name = first(`Site name`)) 

#Setup dataframe inside MPA
#dfMPA9 <- VertData_temporal %>% filter (mpa == 1)
#save (dfMPA9, file = "inputs/FishSizeTemp9locsMPA_April.RData")

#setup dataframe outside MPA
#dfNoMPA9 <- VertData_temporal %>% filter (mpa == 0)
#save (dfNoMPA9, file = "inputs/FishSizeTemp9locsNoMPA_April.RData")

## remove rare observations
df <- VertData_temporal 

# decide which records to retain for analyses
df_summary <- df %>%
  group_by(TAXONOMIC_NAME, Loc, year) %>%
  summarise(records = n()) %>%
  filter(records >= 20) # must have 20 obs per year at a location

# must be at least 20 obervations at a site within a given year
df_ok <- semi_join(df, df_summary, 
  by = c("TAXONOMIC_NAME", "Loc", "year"))

df_summary <- df_ok %>%
  group_by(TAXONOMIC_NAME, Loc) %>%
  summarise(
    num_years = length(unique(year)) # years per location
  ) %>%
  filter(num_years >= 7) %>% # must have 8 years at a loc 
  arrange(TAXONOMIC_NAME, Loc)

# remove data 
df_ok <- semi_join(df_ok, df_summary, 
  by = c("TAXONOMIC_NAME", "Loc")) 

#select only relevant columns
temporal_data <- df_ok %>% select(Location, SurveyID, SiteCode, Diver, TAXONOMIC_NAME, SPECIES_EPITHET, GENUS, FAMILY, ORDER, SizeClass, day, month, year, geogroup, SiteLat, SiteLong, midpoint, LWa, LWb, MaxLenFB, MaxSizeObs, mpa, Loc)

##Add full location names for figures
locnames <- list(NA)
locnames$Loc <- unique(df_ok$Loc)
locnames$LocFig <- c("Port Phillip Bay", "Maria Island", "Port Davey", "Jervis Bay", "Bicheno", "Tinderbox", "Bass Strait", "Jurien Bay", "Ninepin")

temporal_data$LocFig <- locnames$LocFig[match(temporal_data$Loc, locnames$Loc)]
save(temporal_data, file = "inputs/temporal_fish_data.RData")

#how many sites on average per each long-term location
sitesPerLoc <- temporal_data %>% group_by(Loc) %>% summarise(n_sites = n_distinct(SiteCode))
mean(sitesPerLoc$n_sites)

#save coordinates of long term locations to be plotted in Extended data Fig1 (above)

#longLoc <- temporal_data %>% group_by(Loc) %>% summarise (lat = mean(SiteLat), long = mean(SiteLong))
#save(longLoc, file = "inputs/longLoc.RData")

## So file "temporal_data" is the main file used for the Bayesian analyses and only includes years and species with sufficient data (105 species) in the 9 long-term locations
#File VertData_temporal has all species and years for the 9 long-term locations (434 species)

```

### Plot raw sizes through time 

Temporal size plots for all species can be found at https://github.com/astaaudzi/RLSfishSize/SizePlotsTemporal

```{r, warning=FALSE, message=FALSE, echo = F, eval = F, fig.width=10, fig.height=6}
#load the data set for 9 locations 
load(file = "inputs/temporal_fish_data.RData")

df_ok <- temporal_data
Species <- sort(unique(df_ok$TAXONOMIC_NAME))
length(Species)

for (aa in 1:length(Species)) {

 SpeciesName <- Species[aa]
 print(SpeciesName)
 print(aa)
 
 #prepare data 
 df_ok$y <- log(df_ok$SizeClass)
  df_fit <- df_ok %>% filter(TAXONOMIC_NAME == SpeciesName)
  
  ## plot sizes
  df_means <- df_fit %>%
    group_by(year, LocFig) %>%
    summarise(mu = mean(y), n = n(), ln10 = log10(n)) %>%
    arrange(year)
 
  sizeplot <- ggplot(df_fit, aes(x = year, y = y)) +
    geom_point(color = "grey80", size = 1.5) + # observed data
    geom_point(data = df_means, aes(x = year, y = mu, size = ln10), color = "dark red") +
    ylim(0.5, 4.5) +
    facet_wrap( ~ LocFig, scale = "free") +
    labs(
      title = SpeciesName, subtitle = "grey dots show all observed size classes",
      size = "no.ind\n(log10)", x = "Year", y = "Mean log length (cm)") +
    theme_bw()

ggsave (filename = paste("AllSizePlotsMade/",SpeciesName,"temporal.jpg",sep=""), plot = sizeplot, width = 8, height = 7)    

}

```

### Bayesian code for analyses with year RE but no MPA: all MPA combinations
This code was used to run the main analyses on GEM

```{r, warning=FALSE, message=FALSE, echo = F, eval = F}
# packages used during this analysis
library(tidyverse)
library(lme4)
library(rstan)
#rm(list = ls()) # clear memory

#Data set used to do runs for outside MPA
#load(file="inputs/FishSizeTemp9locsNoMPA_April.RData")
#df <- dfNoMPA9
#Data set used to do runs for inside MPA
#load(file="inputs/FishSizeTemp9locsMPA_April.RData")
#df <- dfMPA9

#If using pooled data from inside and outside MPA
load(file = "inputs/VertData_Temporal.RData")
df <- VertData_temporal

# set factors
df$TAXONOMIC_NAME <- factor(df$TAXONOMIC_NAME)
df$FLoc <- factor(df$Loc)
df$Size <- factor(as.character(round(df$SizeClass, 1)), 
  levels = c("2.5",  "5", "7.5", "10", "12.5", "15", "20", "25", "30",
    "35", "37.5", "40", "50", "62.5", "75", "87.5", "90","100", "112.5",
    "125", "137.5", "150", "162.5", "175", "187.5", "200", "250", "300")) 


df_summary <- df %>%
  group_by(TAXONOMIC_NAME) %>%
  summarise(
    records = n(),
    size    = round(mean(SizeClass), 2),
    years   = length(unique(year)),
    Locs    = length(unique(Loc))
  ) %>%
  arrange(desc(records)) # sort by number of records

# extract data on observed size classes
yVals <- sort(unique(df$SizeClass)) # fish size classes
I     <- length(yVals)              # number of fish size classes
yCuts <- rep(0, I-1)                # fish sizes that split size classes
for (i in 2:I) {
  yCuts[i-1] <- log(0.5*(yVals[i-1] + yVals[i])) 
}

# decide which records to retain for analyses
df_summary <- df %>%
  group_by(TAXONOMIC_NAME, Loc, year) %>%
  summarise(records = n()) %>%
  filter(records >= 20) # must have 20 obs per year at a location

# must be at least 20 obervations at a site in a year
df_ok <- semi_join(df, df_summary, 
  by = c("TAXONOMIC_NAME", "Loc", "year"))

df_summary <- df_ok %>%
  group_by(TAXONOMIC_NAME, Loc) %>%
  summarise(
    num_years = length(unique(year)) # years per location
  ) %>%
  filter(num_years >= 7) %>% # must have 8y
  arrange(TAXONOMIC_NAME, Loc)

# join
df_ok <- semi_join(df_ok, df_summary, 
  by = c("TAXONOMIC_NAME", "Loc")) 

ok_species <- sort(unique(df_ok$TAXONOMIC_NAME)) # show species that satisfy criteria
length(ok_species) 


for (i in 1:length(ok_species)) {

focal_species <- ok_species[i] # choose a species to analyse
print(i)
print(focal_species)

# create the data structures needed to fit the model in rstan
df_stan <- df_ok %>% 
  filter(TAXONOMIC_NAME == focal_species) %>%
  mutate(yearC = year - 2018) %>% # rescale year for fitting
  select(year, month, day, Loc, yearC, SizeClass) %>%
  mutate(
    survey_date = as.Date(paste(year, month, day, sep = "-")),
    survey = paste(survey_date, Loc, sep = "-")
  ) %>%
  arrange(survey) %>%
  mutate(indx_srv = as.integer(factor(survey)))

# add random factor indexes for year, location, and sample
years    <- sort(unique(df_stan$year))
df_years <- tibble(year = years, indx_year = 1:length(years))
locs    <- sort(unique(df_stan$Loc))
df_locs <- tibble(Loc = locs, indx_loc = 1:length(locs))
df_yVals <- tibble(SizeClass = yVals, indx_sc = 1:length(yVals))

df_stan <- left_join(df_stan, df_years, by = "year")
df_stan <- left_join(df_stan, df_locs,  by = "Loc")
df_stan <- left_join(df_stan, df_yVals, by = "SizeClass")

# create the matrix of counts (row = survey, column = size class)
df_n <- df_stan %>%
  group_by(indx_srv, indx_sc) %>%
  summarise(n = n())

max_sc <- length(yVals) # max(df_n$indx_sc)
max_srv <- max(df_n$indx_srv)

m_obs <- matrix(data = 0, nrow = max_srv, ncol = max_sc)
for (i in 1:nrow(df_n)) {
  m_obs[df_n$indx_srv[i],df_n$indx_sc[i]] <- df_n$n[i] 
}

# create survey data
df_srv <- df_stan %>%
  group_by(indx_srv) %>%
  summarise(
    yearC= median(yearC),
    year = median(indx_year),
    Loc  = median(indx_loc)
  )


# create the list of data passed to rstan
stan_dat <- list(
  N      = nrow(df_srv),            # surveys
  J      = max(df_srv$Loc),         # locations
  K      = length(yCuts) + 1,       # fish size classes
  L      = max(df_srv$year),        # years of data 
  cutoff = yCuts,                   # size class cut-offs
  y      = m_obs,                   # observations per size class
  x      = df_srv$yearC,            # predictor variable 1 (years ago)
  i_loc  = as.integer(df_srv$Loc),  # locations
  i_yr   = as.integer(df_srv$year)  # year
)


fit <- stan(file = 'stan_models/model1_temporalNoMPA.stan', data = stan_dat, 
    iter = 1000, warmup = 500, chains = 3, refresh = 200,
    control = list(max_treedepth = 15))

save(fit, file = paste("...location for output ... /",focal_species,"_pool.RData",sep=""))

}

```

### STAN code for temporal analyses 

```{r, warning=FALSE, message=FALSE, echo = F, eval = F}
// Shane A. Richards 20/12/2018
// fits locations independently
// includes random effects associated with year, and survey
// combines identical observations (i.e. uses counts, not single observations)

data {
  int  <lower = 1>            N;           // number of surveys
  int  <lower = 1>            J;           // number of locations
  int  <lower = 1>            K;           // number of size classes
  int  <lower = 1>            L;           // number of year classes
  real                        cutoff[K-1]; // size class cut-offs
  matrix[N,K]                 y;           // observed fish size classes (counts)
  real                        x[N];        // centred year (differs per location)
//  int  <lower = 0, upper = 1> mpa[N];      // observed predictor mpa
  int  <lower = 1, upper = J> i_loc[N];    // location (index only)
  int  <lower = 1, upper = L> i_yr[N];     // observed year (index only)
}

parameters {
  real <lower =  1.0, upper = 4.0> beta_0[J];   // log(mean size) for each location
  real <lower =  -0.10, upper = 0.10> beta_yr[J];  // annual change on log(size) for each location
//  real <lower =  -0.50, upper = 0.50> beta_mpa;    // change in log(mean size) when in mpa
  real <lower =  0.100, upper = 0.40> sigma_size;  // variation in (log)size
  real <lower =  0.001, upper = 0.50> sigma_yr;    // random variation among years
  real <lower =  0.001, upper = 0.50> sigma_srv;   // random variation among surveys
  real yr_RE[L];    // estimated year-specific variation (random effect)
  real srv_RE[N];   // estimated survey-specific variation (random effect)
}

transformed parameters {
}

model {
  vector[K-1] cpr;       // cumulative probabilties
  vector[K]   pr;        // probabilities for each size class
  real tmp[J];           // probability of observed size class
  real mu;               // mean log fish size 
  real eps = 0.01;       // a small probability for random size class
  real c1;               // fraction of fish that do not fit distribution model
  real c2;               // probability randomly placed in size class

  c1 = (1.0 - eps);
  c2 = eps/K;
  
  // beta_0   ~ normal(3,0.5); // location-specific prior size (log)
  // beta_yr  ~ normal(0,0.1); // location-specific prior slope
  // beta_mpa ~ normal(0,0.1); // prior mpa effect
  yr_RE    ~ normal(0.0, sigma_yr);  // random interannual differences
  srv_RE   ~ normal(0.0, sigma_srv); // random survey differences

  for (i in 1:N) { // for each survey 
    // calculate mean body size for the survey, given location, year, and survey
    mu = beta_0[i_loc[i]] + beta_yr[i_loc[i]]*x[i] + yr_RE[i_yr[i]] + srv_RE[i];


    // calculate cumulative probabilities of observing fish in each size class
    for (k in 1:(K-1)) {
      cpr[k] = normal_cdf(cutoff[k], mu, sigma_size); // cumulative probability
    }
    // calculate probabilities of observing fish in each size class (with random eps)
    pr[1] = c1*cpr[1] + c2; // probability observe in smallest size class
    for (k in 2:(K-1)) {
      pr[k] = c1*(cpr[k] - cpr[k-1]) + c2; // intermediate size classes
    }
    pr[K] = c1*(1.0 - cpr[K-1]) + c2; // probability of observing in largest size class
    
    for (k in 1:K) {
      target += y[i,k]*log(pr[k]); // add the log-likelihood term for each size class
    }
  }
}

generated quantities {
// int<lower=0, upper=1> y_new[T_new];
// for (t in 1:T_new){
//   y_new[t] = bernoulli_rng(p);
// }
}


```

### Process Bayesian outputs: traceplots, get params

Run the previous chunk with the relevant dataset to get df_ok data. This chunk does not need to be run as outputs are provided 

```{r, warning=FALSE, message=FALSE, echo = F, eval = F}
#load(file = "Species9locsYearREoutMPA.RData")  ## list of species for the analyses
#load(file = "DataFor9locsOutMPA.RData") #Selected datafile

#ok_species <- sort(unique(df_ok$TAXONOMIC_NAME)) # show species that satisfy criteria
#ok_species <- as.character(ok_species)
#ok_species <- ok_species[-which(ok_species == "Trachinops caudimaculatus")]
#load(file = "inputs/midpointSize.RData")

length(ok_species)

AllParams <- as.data.frame(matrix(NA, ncol= 27, nrow = 0))

pdf(file=".../temporal_run_diagnostics.pdf", onefile=TRUE) 

for (aa in 1:length(ok_species)) {
  
SpeciesName <- ok_species[aa]
print(aa)
print(SpeciesName)

load(file = paste("..saved Bayesian fits/",SpeciesName,"_temporal.RData",sep=""))

# create the data structures needed to fit the model in rstan
df_stan <- df_ok %>% 
  filter(TAXONOMIC_NAME == SpeciesName) %>%
  mutate(yearC = year - 2018) %>% # rescale year for fitting
  select(year, month, day, Loc, yearC, SizeClass) %>%
  mutate(
    survey_date = as.Date(paste(year, month, day, sep = "-")),
    survey = paste(survey_date, Loc, sep = "-")
  ) %>%
  arrange(survey) %>%
  mutate(indx_srv = as.integer(factor(survey)))

# add random factor indexes for year, location, and sample
years    <- sort(unique(df_stan$year))
df_years <- tibble(year = years, indx_year = 1:length(years))
locs    <- sort(unique(df_stan$Loc))
df_locs <- tibble(Loc = locs, indx_loc = 1:length(locs))
df_yVals <- tibble(SizeClass = yVals, indx_sc = 1:length(yVals))

df_stan <- left_join(df_stan, df_years, by = "year")
df_stan <- left_join(df_stan, df_locs,  by = "Loc")
df_stan <- left_join(df_stan, df_yVals, by = "SizeClass")

# create the matrix of counts (row = survey, column = size class)
df_n <- df_stan %>%
  group_by(indx_srv, indx_sc) %>%
  summarise(n = n())

max_sc <- length(yVals) # max(df_n$indx_sc)
max_srv <- max(df_n$indx_srv)

m_obs <- matrix(data = 0, nrow = max_srv, ncol = max_sc)
for (i in 1:nrow(df_n)) {
  m_obs[df_n$indx_srv[i],df_n$indx_sc[i]] <- df_n$n[i] 
}

# create survey data
df_srv <- df_stan %>%
  group_by(indx_srv) %>%
  summarise(
    yearC= median(yearC),
    year = median(indx_year),
    Loc  = median(indx_loc)
  )


n_Locs <- max(df_srv$Loc)
# create a list of parameter names to investigate
model_params <- c(paste("beta_0[",1:n_Locs,"]",sep = ""),
  paste("beta_yr[",1:n_Locs,"]",sep = ""), 
  "sigma_size", "sigma_yr", "sigma_srv")

# display the posterior distribution statistic
#print(fit, pars=model_params, probs=c(0.025,0.1,0.5,0.9,0.975), digits=3)

print(traceplot(fit, pars = model_params, inc_warmup = TRUE, ncol = n_Locs))

# extract the parameter estimates into a list
l_params <- rstan::extract(fit, pars = model_params)

# convert list to a data frame
df_prm_pdf <- reshape2::melt(l_params, value.name="Parameter") 

  df_summary <- df_prm_pdf %>% group_by(L1) %>% 
    summarise(
      n= n(), # samples
      p.025 = quantile(Parameter, probs = 0.025),
      p.100 = quantile(Parameter, probs = 0.100),
      p.500 = quantile(Parameter, probs = 0.500),
      p.900 = quantile(Parameter, probs = 0.900),
      p.975 = quantile(Parameter, probs = 0.975)
    )

# plot posterior distributions
df_prm_pdf$L1 <- factor(df_prm_pdf$L1, levels = model_params)
    
params <- ggplot(data=df_prm_pdf, aes(x = Parameter)) + 
  labs(title = SpeciesName) +
  geom_density(fill = "wheat") +
  labs(
    x    = "Value",
    y    = "Probability density"
  ) +
  facet_wrap( ~ L1, scale = "free", ncol = max(3,max(df_srv$Loc))) +
  theme_bw()

print(params)

## Extract posteriors into a list
test <- list()

  test$betazero025 <- df_summary$p.025[c(1:n_Locs)]
  test$betazero10 <- df_summary$p.100[c(1:n_Locs)]
  test$betazero50 <- df_summary$p.500[c(1:n_Locs)]
  test$betazero90 <- df_summary$p.900[c(1:n_Locs)]
  test$betazero975 <- df_summary$p.975[c(1:n_Locs)]
  
  test$betaloc025 <- df_summary$p.025[c((n_Locs+1):(n_Locs*2))]
  test$betaloc10 <- df_summary$p.100[c((n_Locs+1):(n_Locs*2))]
  test$betaloc50 <- df_summary$p.500[c((n_Locs+1):(n_Locs*2))]
  test$betaloc90 <- df_summary$p.900[c((n_Locs+1):(n_Locs*2))]
  test$betaloc975 <- df_summary$p.975[c((n_Locs+1):(n_Locs*2))]
  
  test$sigma_size025 <- df_summary$p.025[n_Locs*2 + 1]
  test$sigma_size10 <- df_summary$p.100[n_Locs*2 + 1]
  test$sigma_size50 <- df_summary$p.500[n_Locs*2 + 1]
  test$sigma_size90 <- df_summary$p.900[n_Locs*2 + 1]
  test$sigma_size975 <- df_summary$p.975[n_Locs*2 + 1]
  
  test$sigma_year025 <- df_summary$p.025[n_Locs*2 + 2]
  test$sigma_year10 <- df_summary$p.100[n_Locs*2 + 2]
  test$sigma_year50 <- df_summary$p.500[n_Locs*2 + 2]
  test$sigma_year90 <- df_summary$p.900[n_Locs*2 + 2]
  test$sigma_year975 <- df_summary$p.975[n_Locs*2 + 2]
  
  test$sigma_surv025 <- df_summary$p.025[n_Locs*2 + 3]
  test$sigma_surv10 <- df_summary$p.100[n_Locs*2 + 3]
  test$sigma_surv50 <- df_summary$p.500[n_Locs*2 + 3]
  test$sigma_surv90 <- df_summary$p.900[n_Locs*2 + 3]
  test$sigma_surv975 <- df_summary$p.975[n_Locs*2 + 3]

  test$locs <- as.character(locs)
  test$species <- as.character(SpeciesName)
  tt <- as.data.frame(test)
  
AllParams <- rbind(AllParams, tt)

}

dev.off()


AllParams$midpoint <- midpointSize$midp[match(AllParams$species, midpointSize$TAXONOMIC_NAME)]
AllParams$Lmax <- midpointSize$Lmax[match(AllParams$species, midpointSize$TAXONOMIC_NAME)]

#columns are incorrectly named= sigma_year and sigma_survey should be swapped
temp <- colnames(AllParams)

correct_columns <- c(temp[c(1:15)],temp[c(21:25)],temp[c(16:20)],temp[c(26:29)])
colnames(AllParams) <- correct_columns
colnames(AllParamsIn) <- correct_columns
colnames(AllParamsOut) <- correct_columns

###
#save(AllParams, file = "analysesOutputs/BayesTempPool_105spp.RData")

```

### Extended data Fig 3: trends per location

```{r warning=FALSE, message=FALSE, eval=T, echo=FALSE, fig.width=10, fig.height=6}

load(file = "analysesOutputs/BayesTempPool_105spp.RData") 
load(file = "analysesOutputs/SSTtrend_fits.RData")

##Add full location names for figures
locnames <- list(NA)
locnames$Loc <- unique(AllParams$locs)
locnames$LocFig <- c("Port Phillip Bay", "Bass Strait", "Bicheno", "Jervis Bay", "Maria Island", "Tinderbox","Jurien Bay",  "Ninepin","Port Davey")
#Add estimated temperature trend (median)
locnames$trend <- SSTtrendFit$Mean[match(locnames$Loc, SSTtrendFit$Loc)]

AllParams$LocFig <- locnames$LocFig[match(AllParams$locs, locnames$Loc)]
AllParams$TempTrend <- locnames$trend[match(AllParams$locs, locnames$Loc)] 

AllParams$color <- "grey"
AllParams$color[which(AllParams$betaloc90 < 0)] <- "red" 
AllParams$color[which(AllParams$betaloc10 > 0)] <- "blue" 

ggplot(AllParams, aes(midpoint, betaloc50, color = color)) +
  geom_hline(yintercept = 0) + 
  ylim(-0.05, 0.05) +  
  geom_point() +
  geom_errorbar(aes(ymin = betaloc10, ymax = betaloc90), size = 0.5) +
  scale_color_manual(values = c("blue", "grey", "red"),
                     labels=c("increasing", "no change", "decreasing"),
                     guides(name = "size response")) +
  guides(color=FALSE) +
  theme_bw() +
  theme(
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(),
    axis.title.x = element_text(size=16),
    axis.title.y = element_text(size=16),
    axis.text=element_text(size=12)
  ) +
  ggtitle("Annual change in body length in 9 long term locations") + ## change to OUTSIDE if using outside MPA data
  labs(
    x    = "Species temperature midpoint",
    y    = "Slope of the annual change in body size (0.01 means ca 1% change per year)"
  ) +
  facet_wrap( ~ LocFig, scale = "fixed", ncol = 3) +
NULL

df <- AllParams %>% select (species, LocFig, TempTrend, betaloc025, betaloc10, betaloc50, betaloc90, betaloc975)
df$decrease <- NA
df$decrease[which(df$betaloc90 < 0)] <- 1
df$increase <- NA
df$increase[which(df$betaloc10 > 0)] <- 1

tempSummary <- df %>% group_by(LocFig) %>% summarise(incr = sum(increase, na.rm=T), decr = sum(decrease, na.rm = T), no_sp = n_distinct(species), change_spp = (decr + incr)/no_sp, temptrend = first(TempTrend)) %>% arrange(temptrend)

knitr::kable(tempSummary, digits = 4)

#cor.test(tempSummary$temptrend, tempSummary$change_spp)

```

### Supplementary Figure 2: temporal changes inside/outside MPA

```{r, warning=FALSE, message=FALSE, echo = F, eval = T, fig.width=10, fig.height=6}
###Outside MPA
load(file = "analysesOutputs/BayesTempPool_105spp.RData") 
AllParamsPool <- AllParams %>% select (betaloc025, betaloc10, betaloc50, betaloc90, betaloc975, locs, species, midpoint, Lmax)
rm(AllParams)

load(file = "analysesOutputs/BayesTempIn_84spp.RData")
AllParamsIn <- AllParamsIn %>% select (betaloc025, betaloc10, betaloc50, betaloc90, betaloc975, locs, species)

colnames(AllParamsIn) <- c("inMPA025","inMPA10", "inMPA50", "inMPA90","inMPA975","locs", "species")

load(file = "analysesOutputs/BayesTempOut_74spp.RData")
AllParamsOut <- AllParamsOut %>% select (betaloc025, betaloc10, betaloc50, betaloc90, betaloc975, locs, species)

colnames(AllParamsOut) <- c("outMPA025","outMPA10", "outMPA50", "outMPA90","outMPA975","locs", "species")

#Now combine the data 
Params2models <- full_join(AllParamsIn, AllParamsOut, by = c("species", "locs"))
Params3models <- full_join(Params2models, AllParamsPool, by = c("species", "locs"))

# simple correlation 
cor.test(Params3models$inMPA50, Params3models$outMPA50)


## we might want to remove the outlier with a slope of 0.04 outside MPA, but for now let's plot all data
ggplot(Params3models, aes(inMPA50,outMPA50)) + 
  ylim(-0.05, 0.05) +   # add ylim to remove the outlier 
  xlim (-0.05, 0.05) +
  geom_hline(yintercept = 0, linetype = 2) + 
  geom_vline(xintercept = 0, linetype = 2) + 
  geom_errorbar(aes(ymin = outMPA10, ymax = outMPA90), color= "grey", size = 0.3) +
  geom_errorbarh(aes(xmin = inMPA10, xmax = inMPA90), color= "grey", size = 0.3) +
    geom_point() +
#    geom_text(aes(label=species), nudge_y = 0.001, size = 2) + 
  geom_text(x=-0.035, y=0.05, label="r = 0.47, p = 7e-08", size = 6, fontface = 3) +
  theme_bw() +
  theme(
    panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    axis.title.x = element_text(size=14),
    axis.title.y = element_text(size=14)) +
# geom_smooth(method = "lm", se = FALSE, linetype = 2, color = "orange") +
  geom_abline(intercept = 0, slope = 1) +
 # ggtitle("Comparing temporal trends in body length inside and outside MPA") + 
  labs(
    x    = "Relative (%) annual change in mean body length inside MPA",
    y    = "Relative (%) annual change in mean body length outside MPA"
  )

#how many species for teh plot above
temp_data <- Params3models %>% select (species, inMPA50, outMPA50) %>% filter (inMPA50 < 0.2) %>% filter (outMPA50 < 0.2)
length(unique(temp_data$species))
rm(temp_data)

```

### Supplementary Table 4: print temporal slopes and summary

```{r, warning=FALSE, message=FALSE, echo = F, eval = T}
load(file = "analysesOutputs/BayesTempPool_105spp.RData")

##Add full location names for figures
locnames <- list(NA)
locnames$Loc <- unique(AllParams$locs)
locnames$LocFig <- c("Port Phillip Bay", "Bass Strait", "Bicheno", "Jervis Bay", "Maria Island", "Tinderbox","Jurien Bay",  "Ninepin","Port Davey")

AllParams$LocFig <- locnames$LocFig[match(AllParams$locs, locnames$Loc)]

temp_slopes_to_print <- AllParams %>% select(species, LocFig, Lmax, midpoint, betaloc025, betaloc10, betaloc50, betaloc90, betaloc975)
colnames(temp_slopes_to_print) = c("species", "Location", "MaxLength", "Temp_midp", "b1_p025","b1_p10","b1_p50","b1_p90","b1_p975")

knitr::kable(temp_slopes_to_print, digits = 4)
#write.csv(temp_slopes_to_print, file = "ExtendedDataFigures/ExtendedDataTable3.csv")

```

### Supplementary Table 2: temporal statistics 

```{r, warning=FALSE, message=FALSE, echo = F, eval = F}
#rm(list = ls())
load(file = "analysesOutputs/BayesTempPool_105spp.RData") 
notwarming = c("Jurien")

#only get trends in warming locations
AllParamsPool <-AllParams %>% filter ((!locs %in% notwarming))

AllParamsPool <- AllParamsPool %>% select(species, locs, betaloc025, betaloc10, betaloc50, betaloc90, betaloc975, betazero025, betazero10, betazero50, betazero90, betazero975) 

length(unique(AllParamsPool$species)) ## 77 spp

#How many species are increasing in temporal analyses at all?
length(unique(AllParamsPool$species[which(AllParamsPool$betaloc50 > 0)])) #34
#How many are increasing "significanlty" (80% PPD above 0)
length(unique(AllParamsPool$species[which(AllParamsPool$betaloc10 > 0)])) #19

incrSppTime <- unique(AllParamsPool$species[which(AllParamsPool$betaloc10 > 0)])

#How many are decreasing in temporal analyses at all?
length(unique(AllParamsPool$species[which(AllParamsPool$betaloc50 < 0)])) #58
#How many are increasing "significanlty" (80% PPD above 0)
length(unique(AllParamsPool$species[which(AllParamsPool$betaloc90 < 0)])) #25

decrSppTime <- unique(AllParamsPool$species[which(AllParamsPool$betaloc90 < 0)])

#which species can show both clear increase and clear decrease trends at different locations
incrSppTime[which(incrSppTime %in% decrSppTime)]
decrSppTime[which(decrSppTime %in% incrSppTime)]

## So we have a total of 19+25-4= 40 species that show clear size trends

## For Table S2 get statistics per species x location combinations

#How many speciesxlocations are increasing in temporal analyses at all?
length(AllParamsPool$species[which(AllParamsPool$betaloc50 > 0)]) #56
length(AllParamsPool$species[which(AllParamsPool$betaloc50 > 0)])/length(AllParamsPool$species) #0.36
#how many are decreasing 
length(AllParamsPool$species[which(AllParamsPool$betaloc50 < 0)]) #100
length(AllParamsPool$species[which(AllParamsPool$betaloc50 < 0)])/length(AllParamsPool$species) #0.64

#How many are increasing "significanlty" (80% PPD above 0)
length(AllParamsPool$species[which(AllParamsPool$betaloc10 > 0)]) #25
length(AllParamsPool$species[which(AllParamsPool$betaloc90 < 0)]) #41
#get proportions
25 / (25+41)
41 / (25+41)


```

### Combine temporal and spatial slopes

```{r, warning=FALSE, message=FALSE, echo = F, eval = F}

#rm(list = ls())
load(file = "analysesOutputs/BayesTempPool_105spp.RData") 
notwarming = c("Jurien")

#only get trends in warming locations
AllParamsPool <-AllParams %>% filter ((!locs %in% notwarming))

AllParamsPool <- AllParamsPool %>% select(species, locs, betaloc025, betaloc10, betaloc50, betaloc90, betaloc975, betazero025, betazero10, betazero50, betazero90, betazero975) 

length(unique(AllParamsPool$species)) ## 77 spp

#How many species are increasing in temporal analyses at all?
length(unique(AllParamsPool$species[which(AllParamsPool$betaloc50 > 0)])) #34
#How many are increasing "significanlty" (80% PPD above 0)
length(unique(AllParamsPool$species[which(AllParamsPool$betaloc10 > 0)])) #19

incrSppTime <- unique(AllParamsPool$species[which(AllParamsPool$betaloc10 > 0)])

#How many speciesxlocations are increasing in temporal analyses at all?
length(AllParamsPool$species[which(AllParamsPool$betaloc50 > 0)]) #56
length(AllParamsPool$species[which(AllParamsPool$betaloc50 > 0)])/length(AllParamsPool$species) #0.36
#how many are decreasing 
length(AllParamsPool$species[which(AllParamsPool$betaloc50 < 0)]) #100
length(AllParamsPool$species[which(AllParamsPool$betaloc50 < 0)])/length(AllParamsPool$species) #0.64


#How many are increasing "significanlty" (80% PPD above 0)
length(AllParamsPool$species[which(AllParamsPool$betaloc10 > 0)]) #25
length(AllParamsPool$species[which(AllParamsPool$betaloc90 < 0)]) #41

25 / (25+41)
41 / (25+41)

#How many are decreasing in temporal analyses at all?
length(unique(AllParamsPool$species[which(AllParamsPool$betaloc50 < 0)])) #58
#How many are increasing "significanlty" (80% PPD above 0)
length(unique(AllParamsPool$species[which(AllParamsPool$betaloc90 < 0)])) #25

decrSppTime <- unique(AllParamsPool$species[which(AllParamsPool$betaloc90 < 0)])

#which species can show both clear increase and clear decrease trends at different locations
incrSppTime[which(incrSppTime %in% decrSppTime)]
decrSppTime[which(decrSppTime %in% incrSppTime)]

## So we have a total of 19+25-4= 40 species that show clear size trends

column <- paste("Time_", colnames(AllParamsPool[3:12]),sep = "")
column <- c("species", "locs", column)
colnames(AllParamsPool) <- column

#Now load cleaned space trends
load(file = "analysesOutputs/slopesInSpace335spp.RData")

TempSpaceCombined <- left_join(AllParamsPool, bet1ci, by = "species")

save(TempSpaceCombined, file = "analysesOutputs/TempSpaceCombined.RData")

```

### Space-time rate comparison

From the Methods section: "Because μ_i is the log transformed body length, the estimated values of β_1 are converted to relative (or %) change in body length as exp(β1), where β1 of e.g. -0.02 corresponds to exp(-0.02) = 0.98 or 2% decrease in body length. Thus β1 values ranging from -0.2 to 0.2 can be interpreted as proportional change in body length" 

```{r, warning=FALSE, message=FALSE, echo = F, eval = T, fig.width=10, fig.height=6}
load(file = "analysesOutputs/TempSpaceCombined.RData")
length(unique(TempSpaceCombined$species))

#include only species that have both temporal and spatial analyses
st <- na.omit(TempSpaceCombined) 
#how many species with both temporal and spatial data?
length(unique(st$species)) #71 species

## Summaries for temporal analyses 

#How many are increasing in temporal analyses at all?
length(unique(st$species[which(st$Time_betaloc50 > 0)])) #32
#How many are increasing "significanlty" (80% PPD above 0)
length(unique(st$species[which(st$Time_betaloc10 > 0)])) #19
#What is the average change per year for these "significant species"?
(median(st$Time_betaloc50[which(st$Time_betaloc10 > 0)]))*100 ## 1.5%

incrSpp <- unique(st$species[which(st$Time_betaloc10 > 0)])

#How many are decreasing in temporal analyses at all?
length(unique(st$species[which(st$Time_betaloc50 < 0)])) #53
#How many are decreasing "significanlty" (80% PPD below 0)
length(unique(st$species[which(st$Time_betaloc90 < 0)])) #23
#What is the average change per year for these "significant species"?
(median(st$Time_betaloc50[which(st$Time_betaloc90 < 0)]))*100 ## -0.9%

decrSpp <- unique(st$species[which(st$Time_betaloc90 < 0)])

tempChangeSpp <- unique(c(incrSpp, decrSpp))

##Summaries for spatial analyses for only those species that show clear body size changes in temporal analyses 

spp38 <- st %>% filter(species %in% tempChangeSpp)

#how much increase per 1C of warming?
(median(spp38$p50[which(spp38$p10 > 0)]))*100 ## 4.5%
(median(spp38$p50[which(spp38$p90 < 0)]))*100 ## 3.4%

##Summaries for spatial analyses for only those species that are present temporal analyses 

#How many are increasing in spatial analyses at all? 
length(unique(st$p50[which(st$p50 > 0)])) #30 spp
(median(st$p50[which(st$p50 > 0)]))*100 ## 3.5%
#How many of them are increasing "signficantly" in spatial analyses (80%PPD above 0)?
length(unique(st$p50[which(st$p10 > 0)])) #19 spp
#how much increase per 1C of warming?
(median(st$p50[which(st$p10 > 0)]))*100 ## 3.5%

#How many are decreasing in spatial analyses at all? 
length(unique(st$p50[which(st$p50 < 0)])) #41 spp
#How many are decreasing "signficantly" in spatial analyses (80% PPD below 0)?
length(unique(st$p50[which(st$p90 < 0)])) #31 spp
#How much decrease in length per 1C of warming? 
(median(st$p50[which(st$p90 < 0)]))*100 ## -3.4%


```

### Fig2 data and plots

```{r, warning=FALSE, message=FALSE, echo = F, eval = T, fig.width=10, fig.height=6}
load(file = "analysesOutputs/TempSpaceCombined.RData")

## select only the 10, 50 and 90% of space and time slopes 

df <- TempSpaceCombined %>% select (species, locs, Time_betaloc025, Time_betaloc10, Time_betaloc50, Time_betaloc90, Time_betaloc975, p025, p10, p50, p90, p975)

colnames(df) <- c("species", "locs", "x_025", "x_10", "x_50", "x_90", "x_975", "y_025", "y_10", "y_50", "y_90", "y_975")

#Get SDs - to use in typeII regression
df_fit <- df %>%
  mutate(
    x = x_50,
    SD_x = (x_975-x_025)/(2*1.96),
    y = y_50,
    SD_y = (y_975-y_025)/(2*1.96)
  ) %>%
  select(Species = species, x, SD_x, y, SD_y, x_10, x_90, y_10, y_90)

df_fit <- na.omit(df_fit)

# standard correlation (no weighting)
cor(df_fit$x, df_fit$y) 

# calculate the weights for each point
wts <- 1/(df_fit$SD_x*df_fit$SD_y) 
wts <- wts/max(wts) # renormalise weights

# non-bootstrap approach
res_wc <- wtd.cor(df_fit$x, df_fit$y, weight = wts, bootse=FALSE)
res_wc

# standard number of bootstraps
res_wc <- wtd.cor(df_fit$x, df_fit$y, weight = wts, bootse=TRUE)
res_wc

# lots of bootstraps
res_wc <- wtd.cor(df_fit$x, df_fit$y, weight = wts, bootse=TRUE, bootn = 10000)
res_wc

###
#if we had 1:1 match, and we get 0.025C change per year or 1C per 40years we expect the slope of 40. But we say that slope in time is 10 times faster than in space (1C spatial change occurs over 4 years) so we expect a slope of 4 

# plot 
ggplot(df_fit, aes(x = x, y = y)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey20") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey20") +
  geom_errorbar(aes(ymin = y_10, ymax = y_90), color = "darkgrey", size = 0.3) +
  geom_errorbarh(aes(xmin = x_10, xmax = x_90), color = "darkgrey", size = 0.3) +
    geom_point(color = "black") +
  geom_abline(intercept = 0, slope = 4, color = "orange", size = 0.6) +
  #geom_abline(yintercept = beta0, slope = beta1, color = "red", size = 1) +
 # xlim(-0.06,0.06) + ylim(-0.15,0.25) +
  theme_bw() +
#  geom_text(x=-0.045, y=0.25, label="rho = 0.28, p = 0.005", size = 7, fontface = 3) + 
  geom_text(x=-0.057, y=0.25, label= expression (paste(rho, " = 0.28, p = 0.005")), size = 6, fontface = 3) + 
    theme(
    panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    axis.title.x = element_text(size=13),
    axis.title.y = element_text(size=13),
    axis.text=element_text(size=12)) +
  #geom_smooth(method = "lm", se = FALSE, linetype = 4, color = "red") +
  # ggtitle("Comparing trends of body size in space and time") + 
  labs(
    y    = "Relative change in body lenght per 1C difference across space",
    x    = "Relative annual change in body length in one location"
  )


```

##END