From ff5be1e06a682719f739a1624e30204b169ba04e Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Mon, 23 Oct 2023 09:26:28 +0300 Subject: [PATCH] Update aaltobda install instructions --- FAQ.Rmd | 6 +- assignments/assignment6.qmd | 83 +++++++++-------- assignments/includes/_general_info.md | 3 +- assignments/simple_template3.qmd | 92 +++++++++---------- assignments/template2.qmd | 48 +++++----- assignments/template3.qmd | 126 +++++++++++++------------- assignments/template4.qmd | 38 ++++---- assignments/template5.qmd | 44 +++++---- assignments/template6.qmd | 58 ++++++------ 9 files changed, 241 insertions(+), 257 deletions(-) diff --git a/FAQ.Rmd b/FAQ.Rmd index cb0c8250..ea0ca03c 100644 --- a/FAQ.Rmd +++ b/FAQ.Rmd @@ -120,11 +120,9 @@ install.packages(c("MASS", "bayesplot", "brms", "cmdstanr ", "dplyr", "gganimate The course has its own R package `aaltobda` with data and functionality to simplify coding. `aaltobda` has been pre-installed in -Aalto JupyterHub. To install the package to your own computer just run the following -(upgrade=”never” skips question about updating other packages): +Aalto JupyterHub. To install the package to your own computer just run the following: -1. `install.packages("remotes")` -2. `remotes::install_github("avehtari/BDA_course_Aalto", subdir = "rpackage", upgrade="never", dependencies=TRUE)` +1. `install.packages("aaltobda", repos = c("https://avehtari.github.io/BDA_course_Aalto/", getOption("repos")))` If during the course there is announcement that `aaltobda` has been updated (e.g. some error has been fixed), you can get the latest diff --git a/assignments/assignment6.qmd b/assignments/assignment6.qmd index e44077eb..3082814c 100644 --- a/assignments/assignment6.qmd +++ b/assignments/assignment6.qmd @@ -1,33 +1,33 @@ --- title: "Assignment 6" author: "Aki Vehtari et al." -format: +format: html: toc: true code-tools: true - code-line-numbers: true + code-line-numbers: true number-sections: true mainfont: Georgia, serif page-layout: article editor: source filters: - - includes/assignments.lua - - includes/include-code-files.lua + - includes/assignments.lua + - includes/include-code-files.lua --- # General information **The maximum amount of points from this assignment is 6.** -We have prepared a **quarto template specific to this assignment ([html](template6.html), [qmd](https://avehtari.github.io/BDA_course_Aalto/assignments/template6.qmd), [pdf](template6.pdf))** to help you get started. +We have prepared a **quarto template specific to this assignment ([html](template6.html), [qmd](https://avehtari.github.io/BDA_course_Aalto/assignments/template6.qmd), [pdf](template6.pdf))** to help you get started. ::: {.aalto} We recommend you use [jupyter.cs.aalto.fi](https://jupyter.cs.aalto.fi) or the [docker container](docker.html). ::: -::: {.hint} -**Reading instructions:** +::: {.hint} +**Reading instructions:** - [**The reading instructions for BDA3 Chapter 10**](../BDA3_notes.html#ch10). - [**The reading instructions for BDA3 Chapter 11**](../BDA3_notes.html#ch11). @@ -44,16 +44,15 @@ We recommend you use [jupyter.cs.aalto.fi](https://jupyter.cs.aalto.fi) or the [ :::: {.content-hidden when-format="pdf"} ::: {.callout-tip collapse=true} - -## Setup + +## Setup JupyterHub has all the needed packages pre-installed. The following installs and loads the `aaltobda` package: ```{r} if(!require(aaltobda)){ - install.packages("remotes") - remotes::install_github("avehtari/BDA_course_Aalto", subdir = "rpackage", upgrade="never") + install.packages("aaltobda", repos = c("https://avehtari.github.io/BDA_course_Aalto/", getOption("repos"))) library(aaltobda) } ``` @@ -129,7 +128,7 @@ p(y|x,\alpha,\beta,\sigma) &= p_\mathrm{normal}(y|\alpha + \beta x, \sigma) & \text{(normal likelihood)} &\text{.} \end{aligned} $$ -In both the statistical model above and in the Stan model below, $x \in \mathbb{R}^N$ and $y \in \mathbb{R}^N$ are vectors of the covariates / predictors (the assignment number) and vectors of the observation (proportions of students who have handed in the respective assignment). $\alpha \in \mathbb{R}$ is the unknown scalar intercept, $\beta \in \mathbb{R}$ is the unknown scalar slope and $\sigma \in \mathbb{R}_{>0}$ is the unknown scalar observation standard deviation. The statistical model further implies +In both the statistical model above and in the Stan model below, $x \in \mathbb{R}^N$ and $y \in \mathbb{R}^N$ are vectors of the covariates / predictors (the assignment number) and vectors of the observation (proportions of students who have handed in the respective assignment). $\alpha \in \mathbb{R}$ is the unknown scalar intercept, $\beta \in \mathbb{R}$ is the unknown scalar slope and $\sigma \in \mathbb{R}_{>0}$ is the unknown scalar observation standard deviation. The statistical model further implies $$ p(y_\mathrm{pred.}|x_\mathrm{pred.},\alpha,\beta,\sigma) = p_\mathrm{normal}(y_\mathrm{pred.}|\alpha + \beta x_\mathrm{pred.}, \sigma) $$ @@ -140,23 +139,23 @@ You can download [the broken stan file from github](./additional_files/assignmen ```{.stan} data { #<1> // number of data points - int N; + int N; // covariate / predictor - vector[N] x; + vector[N] x; // observations - vector[N] y; + vector[N] y; // number of covariate values to make predictions at int no_predictions; // covariate values to make predictions at - vector[no_predictions] x_predictions; + vector[no_predictions] x_predictions; } #<1> parameters { #<2> // intercept - real alpha; + real alpha; // slope - real beta; + real beta; // the standard deviation should be constrained to be positive - real sigma; + real sigma; } #<2> transformed parameters { #<3> // deterministic transformation of parameters and data @@ -164,7 +163,7 @@ transformed parameters { #<3> } #<3> model { #<4> // observation model / likelihood - y ~ normal(mu, sigma); + y ~ normal(mu, sigma); } #<4> generated quantities { #<5> // compute the means for the covariate values at which to make predictions @@ -200,7 +199,7 @@ Find the ***three mistakes*** in the code and fix them. Report the original mist ::: {.hint} You may find some of the mistakes in the code using Stan syntax checker. If you copy the Stan code to a file ending `.stan` and open it in RStudio (you can also choose from RStudio menu File$\rightarrow$New File$\rightarrow$Stan file to create a new Stan file), the editor will show you some syntax errors. More syntax errors might be detected by clicking `Check' in the bar just above the Stan file in the RStudio editor. Note that some of the errors in the presented Stan code may not be syntax errors. -::: +::: @@ -218,7 +217,7 @@ The author runs the corrected Stan file using the following R code and plots the #| warning: false # These are our observations y: the proportion of students handing in each assignment (1-8), # sorted by year (row-wise) and assignment (column-wise). -# While the code suggest a matrix structure, +# While the code suggest a matrix structure, # the result will actually be a vector of length N = no_years * no_assignments propstudents<-c(c(176, 174, 158, 135, 138, 129, 126, 123)/176, c(242, 212, 184, 177, 174, 172, 163, 156)/242, @@ -228,7 +227,7 @@ propstudents<-c(c(176, 174, 158, 135, 138, 129, 126, 123)/176, # These are our predictors x: for each observation, the corresponding assignment number. assignment <- rep(1:8, 5) # These are in some sense our test data: the proportion of students handing in the last assignment (9), -# sorted by year. +# sorted by year. # Usually, we would not want to split our data like that and instead # use e.g. Leave-One-Out Cross-Validation (LOO-CV, see e.g. http://mc-stan.org/loo/index.html) # to evaluate model performance. @@ -246,8 +245,8 @@ model_data = list(N=length(assignment), ``` **Sampling from the posterior distribution happens here**: ```{r} -#| warning: false -# This reads the file at the specified path and tries to compile it. +#| warning: false +# This reads the file at the specified path and tries to compile it. # If it fails, an error is thrown. retention_model = cmdstan_model("./additional_files/assignment6_linear_model.stan") # This "out <- capture.output(...)" construction suppresses output from cmdstanr @@ -263,21 +262,21 @@ out <- capture.output( # This extracts the draws from the sampling result as a data.frame. draws_df = fit$draws(format="draws_df") -# This does some data/draws wrangling to compute the 5, 50 and 95 percentiles of -# the mean at the specified covariate values (x_predictions). +# This does some data/draws wrangling to compute the 5, 50 and 95 percentiles of +# the mean at the specified covariate values (x_predictions). # It can be instructive to play around with each of the data processing steps # to find out what each step does, e.g. by removing parts from the back like "|> gather(pct,y,-x)" # and printing the resulting data.frame. -mu_quantiles_df = draws_df |> - subset_draws(variable = c("mu_pred")) |> - summarise_draws(~quantile2(.x, probs = c(0.05, .5, 0.95))) |> - mutate(x = 1:9) |> +mu_quantiles_df = draws_df |> + subset_draws(variable = c("mu_pred")) |> + summarise_draws(~quantile2(.x, probs = c(0.05, .5, 0.95))) |> + mutate(x = 1:9) |> pivot_longer(c(q5, q50, q95), names_to = c("pct")) # Same as above, but for the predictions. -y_quantiles_df = draws_df |> - subset_draws(variable = c("y_pred")) |> - summarise_draws(~quantile2(.x, probs = c(0.05, .5, 0.95))) |> - mutate(x = 1:9) |> +y_quantiles_df = draws_df |> + subset_draws(variable = c("y_pred")) |> + summarise_draws(~quantile2(.x, probs = c(0.05, .5, 0.95))) |> + mutate(x = 1:9) |> pivot_longer(c(q5, q50, q95), names_to = c("pct")) ``` @@ -290,14 +289,14 @@ y_quantiles_df = draws_df |> #| label: fig-posterior #| fig-cap: Describe me in your submission! ggplot() + - # scatter plot of the training data: + # scatter plot of the training data: geom_point( - aes(x, y, color=assignment), + aes(x, y, color=assignment), data=data.frame(x=assignment, y=propstudents, assignment="1-8") ) + # scatter plot of the test data: geom_point( - aes(x, y, color=assignment), + aes(x, y, color=assignment), data=data.frame(x=no_assignments, y=propstudents9, assignment="9") ) + # you have to tell us what this plots: @@ -340,7 +339,7 @@ Based on the above plot, answer the following questions: - What is the general trend of student retention as measured by assignment submissions? - Given a model fitted to the submission data for assignments 1-8, does it do a good job predicting the proportion of students who submit the final 9th assignment? - Name one different modeling choice you could make to improve the prediction. -::: +::: ::: {.rubric} @@ -379,7 +378,7 @@ Based on the above plot, answer the following questions: * Has at least one way to improve the model been mentioned (E.g. **...** or **...**)? ::: -# Generalized linear model: Bioassay with Stan (4 points) +# Generalized linear model: Bioassay with Stan (4 points) Replicate the computations for the bioassay example of section 3.7 (BDA3) using Stan. @@ -460,13 +459,13 @@ any problems in setting it up or using it. Please report, ::: - + ::: {.rubric} * Is the Stan model code included? * No * Yes -* Does the implemented Stan-model seem to be working? +* Does the implemented Stan-model seem to be working? * No implementation * Model implemented but results not visualized/reported * Model implemented, but the results seem weird diff --git a/assignments/includes/_general_info.md b/assignments/includes/_general_info.md index 547a5ad0..9b4ec664 100644 --- a/assignments/includes/_general_info.md +++ b/assignments/includes/_general_info.md @@ -20,8 +20,7 @@ To install the package on your own system, run the following code (upgrade=\"never\" skips question about updating other packages): ```{.r} -install.packages("remotes") -remotes::install_github("avehtari/BDA_course_Aalto", subdir = "rpackage", upgrade="never") +install.packages("aaltobda", repos = c("https://avehtari.github.io/BDA_course_Aalto/", getOption("repos"))) ``` - Many of the exercises can be checked automatically using the R package `markmyassignment` (pre-installed in JupyterHub). diff --git a/assignments/simple_template3.qmd b/assignments/simple_template3.qmd index 314ccb14..0900b757 100644 --- a/assignments/simple_template3.qmd +++ b/assignments/simple_template3.qmd @@ -1,15 +1,15 @@ --- title: "Assignment 3" author: anonymous # <-- hand in anonymously -format: +format: html: toc: true code-tools: true - code-line-numbers: true + code-line-numbers: true number-sections: true mainfont: Georgia, serif page-layout: article - pdf: + pdf: geometry: - left=1cm,top=1cm,bottom=1cm,right=7cm number-sections: true @@ -22,37 +22,36 @@ editor: source :::: {.content-hidden when-format="pdf"} ::: {.callout-warning collapse=false} - -## Setup + +## Setup *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + This is the template for [assignment 3](assignment3.html). You can download the qmd-files ([full](https://avehtari.github.io/BDA_course_Aalto/assignments/template3.qmd), [simple](https://avehtari.github.io/BDA_course_Aalto/assignments/simple_template3.qmd)) or copy the code from this rendered document after clicking on ` Code` in the top right corner. -**Please replace the instructions in this template by your own text, explaining what you are doing in each exercise.** +**Please replace the instructions in this template by your own text, explaining what you are doing in each exercise.** The following will set-up [`markmyassignment`](https://github.com/MansMeg/markmyassignment) to check your functions at the end of the notebook: - -```{r} + +```{r} if(!require(markmyassignment)){ install.packages("markmyassignment") library(markmyassignment) } assignment_path = paste("https://github.com/avehtari/BDA_course_Aalto/", "blob/master/assignments/tests/assignment3.yml", sep="") -set_assignment(assignment_path) -``` +set_assignment(assignment_path) +``` The following installs and loads the `aaltobda` package: ```{r} if(!require(aaltobda)){ - install.packages("remotes") - remotes::install_github("avehtari/BDA_course_Aalto", subdir = "rpackage", upgrade="never") + install.packages("aaltobda", repos = c("https://avehtari.github.io/BDA_course_Aalto/", getOption("repos"))) library(aaltobda) } ``` @@ -70,7 +69,7 @@ if(!require(latex2exp)){ -# Inference for normal mean and deviation (3 points) +# Inference for normal mean and deviation (3 points) Loading the library and the data. ``` {r} @@ -90,17 +89,17 @@ windshieldy_test <- c(13.357, 14.928, 14.896, 14.820) ## (a) -Write your answers here! +Write your answers here! ## (b) -Write your answers and code here! +Write your answers and code here! **Keep the below name and format for the functions to work with `markmyassignment`:** -```{r} +```{r} # Useful functions: mean(), length(), sqrt(), sum() # and qtnew(), dtnew() (from aaltobda) @@ -108,13 +107,13 @@ mu_point_est <- function(data) { # Do computation here, and return as below. # This is the correct return value for the test data provided above. 14.5 - + } mu_interval <- function(data, prob = 0.95) { # Do computation here, and return as below. # This is the correct return value for the test data provided above. c(13.3, 15.7) - + } ``` @@ -125,13 +124,13 @@ You can plot the density as below if you implement `mu_pdf` to compute the PDF o #| fig-cap: PDF of the posterior $p(\mu|y)$ of the average hardness $\mu$ mu_pdf <- function(data, x){ # Compute necessary parameters here. - # These are the correct parameters for `windshieldy_test` + # These are the correct parameters for `windshieldy_test` # with the provided uninformative prior. df = 3 location = 14.5 scale = 0.3817557 # Use the computed parameters as below to compute the PDF: - + dtnew(x, df, location, scale) } @@ -140,8 +139,8 @@ lower_x = x_interval[1] upper_x = x_interval[2] x = seq(lower_x, upper_x, length.out=1000) plot( - x, mu_pdf(windshieldy1, x), type="l", - xlab=TeX(r'(average hardness $\mu$)'), + x, mu_pdf(windshieldy1, x), type="l", + xlab=TeX(r'(average hardness $\mu$)'), ylab=TeX(r'(PDF of the posterior $p(\mu|y)$)') ) ``` @@ -154,7 +153,7 @@ plot( Write your answers and code here! **Keep the below name and format for the functions to work with `markmyassignment`:** -```{r} +```{r} # Useful functions: mean(), length(), sqrt(), sum() # and qtnew(), dtnew() (from aaltobda) @@ -162,13 +161,13 @@ mu_pred_point_est <- function(data) { # Do computation here, and return as below. # This is the correct return value for the test data provided above. 14.5 - + } mu_pred_interval <- function(data, prob = 0.95) { # Do computation here, and return as below. # This is the correct return value for the test data provided above. c(11.8, 17.2) - + } ``` @@ -179,13 +178,13 @@ You can plot the density as below if you implement `mu_pred_pdf` to compute the #| fig-cap: PDF of the posterior predictive $p(\tilde{y}|y)$ of a new hardness observation $\tilde{y}$ mu_pred_pdf <- function(data, x){ # Compute necessary parameters here. - # These are the correct parameters for `windshieldy_test` + # These are the correct parameters for `windshieldy_test` # with the provided uninformative prior. df = 3 location = 14.5 scale = 0.8536316 # Use the computed parameters as below to compute the PDF: - + dtnew(x, df, location, scale) } @@ -194,19 +193,19 @@ lower_x = x_interval[1] upper_x = x_interval[2] x = seq(lower_x, upper_x, length.out=1000) plot( - x, mu_pred_pdf(windshieldy1, x), type="l", - xlab=TeX(r'(new hardness observation $\tilde{y}$)'), + x, mu_pred_pdf(windshieldy1, x), type="l", + xlab=TeX(r'(new hardness observation $\tilde{y}$)'), ylab=TeX(r'(PDF of the posterior predictive $p(\tilde{y}|y)$)') ) ``` -# Inference for the difference between proportions (3 points) +# Inference for the difference between proportions (3 points) ## (a) -Write your answers here! +Write your answers here! @@ -226,20 +225,20 @@ p1 = rbeta(ndraws, 10, 90) **Keep the below name and format for the functions to work with `markmyassignment`:** -```{r} +```{r} # Useful function: mean(), quantile() posterior_odds_ratio_point_est <- function(p0, p1) { # Do computation here, and return as below. # This is the correct return value for the test data provided above. 2.650172 - + } posterior_odds_ratio_interval <- function(p0, p1, prob = 0.95) { # Do computation here, and return as below. # This is the correct return value for the test data provided above. c(0.6796942,7.3015964) - + } ``` @@ -250,10 +249,10 @@ posterior_odds_ratio_interval <- function(p0, p1, prob = 0.95) { ## (c) -Write your answers and code here! +Write your answers and code here! -# Inference for the difference between normal means (3 points) +# Inference for the difference between normal means (3 points) Loading the library and the data. ``` {r} @@ -268,15 +267,15 @@ head(windshieldy2) ## (a) -Write your answers here! +Write your answers here! ## (b) -Write your answers and code here! -```{r} +Write your answers and code here! +```{r} # Useful functions: mean(), length(), sqrt(), sum(), # rtnew() (from aaltobda), quantile() and hist(). ``` @@ -286,7 +285,7 @@ Write your answers and code here! ## (c) -Write your answers here! +Write your answers here! @@ -298,15 +297,14 @@ Write your answers here! *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + The following will check the functions for which `markmyassignment` has been set up: - -```{r} -mark_my_assignment() -``` + +```{r} +mark_my_assignment() +``` ::: :::: - diff --git a/assignments/template2.qmd b/assignments/template2.qmd index 87c3a6e2..7f1d1002 100644 --- a/assignments/template2.qmd +++ b/assignments/template2.qmd @@ -1,23 +1,23 @@ --- title: "Assignment 2" author: anonymous # <-- hand in anonymously -format: +format: html: toc: true code-tools: true - code-line-numbers: true + code-line-numbers: true number-sections: true mainfont: Georgia, serif - pdf: + pdf: geometry: - left=1cm,top=1cm,bottom=1cm,right=7cm number-sections: true code-annotations: none - include-in-header: + include-in-header: text: | - % You can add TeX macros here for PDF, + % You can add TeX macros here for PDF, % see https://quarto.org/docs/output-formats/pdf-basics.html#latex-includes - \newcommand{\BetaDist}{\mathrm{Beta}} + \newcommand{\BetaDist}{\mathrm{Beta}} editor: source --- @@ -33,35 +33,34 @@ $$ :::: {.content-hidden when-format="pdf"} ::: {.callout-warning collapse=false} - -## Setup + +## Setup *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + This is the template for [assignment 2](assignment2.html). You can download the [qmd-file](https://avehtari.github.io/BDA_course_Aalto/assignments/template2.qmd) or copy the code from this rendered document after clicking on ` Code` in the top right corner. -**Please replace the instructions in this template by your own text, explaining what you are doing in each exercise.** +**Please replace the instructions in this template by your own text, explaining what you are doing in each exercise.** The following will set-up `markmyassignment` to check your functions at the end of the notebook: - -```{r} -library(markmyassignment) + +```{r} +library(markmyassignment) assignment_path = paste("https://github.com/avehtari/BDA_course_Aalto/", "blob/master/assignments/tests/assignment2.yml", sep="") -set_assignment(assignment_path) -``` +set_assignment(assignment_path) +``` The following installs the `aaltobda` package: ```{r} #| cache: true # Caching should be fine here -install.packages("remotes") -remotes::install_github("avehtari/BDA_course_Aalto", subdir = "rpackage", upgrade="never") +install.packages("aaltobda", repos = c("https://avehtari.github.io/BDA_course_Aalto/", getOption("repos"))) ``` ::: @@ -93,7 +92,7 @@ algae_test <- c(0, 1, 1, 0, 0, 0) Write the likelihood, the prior and the posterior here! ```{r} -# These are not the actual values for the posterior! +# These are not the actual values for the posterior! # You will have to compute those from the data! posterior_alpha = 2 posterior_beta = 10 @@ -112,7 +111,7 @@ This template defines a `\BetaDist` TeX command which renders `$\BetaDist(1,2)$` Keep the below name and format for the functions to work with `markmyassignment`: -```{r} +```{r} # Useful function: qbeta() beta_point_est <- function(prior_alpha, prior_beta, data) { @@ -173,15 +172,14 @@ Plot the PDFs here. Explain shortly what you do. *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + The following will check the functions for which `markmyassignment` has been set up: - -```{r} -mark_my_assignment() -``` + +```{r} +mark_my_assignment() +``` ::: :::: - diff --git a/assignments/template3.qmd b/assignments/template3.qmd index cb57c975..12249f72 100644 --- a/assignments/template3.qmd +++ b/assignments/template3.qmd @@ -1,15 +1,15 @@ --- title: "Assignment 3" author: anonymous # <-- hand in anonymously -format: +format: html: toc: true code-tools: true - code-line-numbers: true + code-line-numbers: true number-sections: true mainfont: Georgia, serif page-layout: article - pdf: + pdf: geometry: - left=1cm,top=1cm,bottom=1cm,right=7cm number-sections: true @@ -22,37 +22,36 @@ editor: source :::: {.content-hidden when-format="pdf"} ::: {.callout-warning collapse=false} - -## Setup + +## Setup *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + This is the template for [assignment 3](assignment3.html). You can download the qmd-files ([full](https://avehtari.github.io/BDA_course_Aalto/assignments/template3.qmd), [simple](https://avehtari.github.io/BDA_course_Aalto/assignments/simple_template3.qmd)) or copy the code from this rendered document after clicking on ` Code` in the top right corner. -**Please replace the instructions in this template by your own text, explaining what you are doing in each exercise.** +**Please replace the instructions in this template by your own text, explaining what you are doing in each exercise.** The following will set-up [`markmyassignment`](https://github.com/MansMeg/markmyassignment) to check your functions at the end of the notebook: - -```{r} + +```{r} if(!require(markmyassignment)){ install.packages("markmyassignment") library(markmyassignment) } assignment_path = paste("https://github.com/avehtari/BDA_course_Aalto/", "blob/master/assignments/tests/assignment3.yml", sep="") -set_assignment(assignment_path) -``` +set_assignment(assignment_path) +``` The following installs and loads the `aaltobda` package: ```{r} if(!require(aaltobda)){ - install.packages("remotes") - remotes::install_github("avehtari/BDA_course_Aalto", subdir = "rpackage", upgrade="never") + install.packages("aaltobda", repos = c("https://avehtari.github.io/BDA_course_Aalto/", getOption("repos"))) library(aaltobda) } ``` @@ -77,7 +76,7 @@ if(!require(latex2exp)){ *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + *This block showcases advanced tools, which you will be allowed and expected to use after this assignment.* @@ -119,9 +118,9 @@ if(!require(ggdist)){ ::: :::: - -# Inference for normal mean and deviation (3 points) + +# Inference for normal mean and deviation (3 points) Loading the library and the data. ``` {r} @@ -141,17 +140,17 @@ windshieldy_test <- c(13.357, 14.928, 14.896, 14.820) ## (a) -Write your answers here! +Write your answers here! ## (b) -Write your answers and code here! +Write your answers and code here! **Keep the below name and format for the functions to work with `markmyassignment`:** -```{r} +```{r} # Useful functions: mean(), length(), sqrt(), sum() # and qtnew(), dtnew() (from aaltobda) @@ -159,13 +158,13 @@ mu_point_est <- function(data) { # Do computation here, and return as below. # This is the correct return value for the test data provided above. 14.5 - + } mu_interval <- function(data, prob = 0.95) { # Do computation here, and return as below. # This is the correct return value for the test data provided above. c(13.3, 15.7) - + } ``` @@ -176,13 +175,13 @@ You can plot the density as below if you implement `mu_pdf` to compute the PDF o #| fig-cap: PDF of the posterior $p(\mu|y)$ of the average hardness $\mu$ mu_pdf <- function(data, x){ # Compute necessary parameters here. - # These are the correct parameters for `windshieldy_test` + # These are the correct parameters for `windshieldy_test` # with the provided uninformative prior. df = 3 location = 14.5 scale = 0.3817557 # Use the computed parameters as below to compute the PDF: - + dtnew(x, df, location, scale) } @@ -191,8 +190,8 @@ lower_x = x_interval[1] upper_x = x_interval[2] x = seq(lower_x, upper_x, length.out=1000) plot( - x, mu_pdf(windshieldy1, x), type="l", - xlab=TeX(r'(average hardness $\mu$)'), + x, mu_pdf(windshieldy1, x), type="l", + xlab=TeX(r'(average hardness $\mu$)'), ylab=TeX(r'(PDF of the posterior $p(\mu|y)$)') ) ``` @@ -205,7 +204,7 @@ plot( Write your answers and code here! **Keep the below name and format for the functions to work with `markmyassignment`:** -```{r} +```{r} # Useful functions: mean(), length(), sqrt(), sum() # and qtnew(), dtnew() (from aaltobda) @@ -213,13 +212,13 @@ mu_pred_point_est <- function(data) { # Do computation here, and return as below. # This is the correct return value for the test data provided above. 14.5 - + } mu_pred_interval <- function(data, prob = 0.95) { # Do computation here, and return as below. # This is the correct return value for the test data provided above. c(11.8, 17.2) - + } ``` @@ -230,13 +229,13 @@ You can plot the density as below if you implement `mu_pred_pdf` to compute the #| fig-cap: PDF of the posterior predictive $p(\tilde{y}|y)$ of a new hardness observation $\tilde{y}$ mu_pred_pdf <- function(data, x){ # Compute necessary parameters here. - # These are the correct parameters for `windshieldy_test` + # These are the correct parameters for `windshieldy_test` # with the provided uninformative prior. df = 3 location = 14.5 scale = 0.8536316 # Use the computed parameters as below to compute the PDF: - + dtnew(x, df, location, scale) } @@ -245,19 +244,19 @@ lower_x = x_interval[1] upper_x = x_interval[2] x = seq(lower_x, upper_x, length.out=1000) plot( - x, mu_pred_pdf(windshieldy1, x), type="l", - xlab=TeX(r'(new hardness observation $\tilde{y}$)'), + x, mu_pred_pdf(windshieldy1, x), type="l", + xlab=TeX(r'(new hardness observation $\tilde{y}$)'), ylab=TeX(r'(PDF of the posterior predictive $p(\tilde{y}|y)$)') ) ``` -# Inference for the difference between proportions (3 points) +# Inference for the difference between proportions (3 points) ## (a) -Write your answers here! +Write your answers here! @@ -277,20 +276,20 @@ p1 = rbeta(ndraws, 10, 90) **Keep the below name and format for the functions to work with `markmyassignment`:** -```{r} +```{r} # Useful function: mean(), quantile() posterior_odds_ratio_point_est <- function(p0, p1) { # Do computation here, and return as below. # This is the correct return value for the test data provided above. 2.650172 - + } posterior_odds_ratio_interval <- function(p0, p1, prob = 0.95) { # Do computation here, and return as below. # This is the correct return value for the test data provided above. c(0.6796942,7.3015964) - + } ``` @@ -298,13 +297,13 @@ posterior_odds_ratio_interval <- function(p0, p1, prob = 0.95) { :::: {.content-hidden when-format="pdf"} ::: {.callout-tip collapse=false} - + ## Showcase: advanced tools (`posterior`'s `rvar`, `ggdist`'s `stat_dotsinterval`) *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + *This block showcases advanced tools, which you will be allowed and expected to use after this assignment.* @@ -313,15 +312,15 @@ posterior_odds_ratio_interval <- function(p0, p1, prob = 0.95) { -The `posterior` package's random variable datatype `rvar` is a +The `posterior` package's random variable datatype `rvar` is a ["sample-based representation of random variables"](https://mc-stan.org/posterior/articles/rvar.html#:~:text=sample%2Dbased%20representation%20of%20random%20variables) -which makes handling of random samples (of draws) such as the ones contained in the above variables `p0` and `p1` easier. -[By default, it prints as the mean and standard deviation of the draws](https://mc-stan.org/posterior/articles/rvar.html#:~:text=The%20default%20display%20of%20an%20rvar%20shows%20the%20mean%20and%20standard%20deviation%20of%20each%20element%20of%20the%20array.), **such that `rvar(p0)` prints as `r rvar(p0)` and `rvar(p1)` prints as `r rvar(p1)`**. +which makes handling of random samples (of draws) such as the ones contained in the above variables `p0` and `p1` easier. +[By default, it prints as the mean and standard deviation of the draws](https://mc-stan.org/posterior/articles/rvar.html#:~:text=The%20default%20display%20of%20an%20rvar%20shows%20the%20mean%20and%20standard%20deviation%20of%20each%20element%20of%20the%20array.), **such that `rvar(p0)` prints as `r rvar(p0)` and `rvar(p1)` prints as `r rvar(p1)`**. The datatype is ["designed to [...] be able to be used inside `data.frame()`s and `tibble()`s, and to be used with distribution visualizations in the ggdist package."](https://mc-stan.org/posterior/articles/rvar.html#:~:text=designed%20to%20interoperate%20with%20vectorized%20distributions%20in%20the%20distributional%20package%2C%20to%20be%20able%20to%20be%20used%20inside%20data.frame()s%20and%20tibble()s%2C%20and%20to%20be%20used%20with%20distribution%20visualizations%20in%20the%20ggdist%20package.) -The code below sets up an [R `data.frame()`](https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/data.frame) with the draws in `p0` and `p1` wrapped in an `rvar`, and uses that data frame to visualize the draws using [`ggdist`](https://mjskay.github.io/ggdist/index.html), an R package building on [`ggplot2`](https://ggplot2.tidyverse.org/) and ["designed for both frequentist and Bayesian uncertainty visualization"](https://mjskay.github.io/ggdist/index.html#:~:text=designed%20for%20both%20frequentist%20and%20Bayesian%20uncertainty%20visualization). +The code below sets up an [R `data.frame()`](https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/data.frame) with the draws in `p0` and `p1` wrapped in an `rvar`, and uses that data frame to visualize the draws using [`ggdist`](https://mjskay.github.io/ggdist/index.html), an R package building on [`ggplot2`](https://ggplot2.tidyverse.org/) and ["designed for both frequentist and Bayesian uncertainty visualization"](https://mjskay.github.io/ggdist/index.html#:~:text=designed%20for%20both%20frequentist%20and%20Bayesian%20uncertainty%20visualization). -The below plot, @fig-showcase-probabilities uses `ggdist`'s [`stat_dotsinterval()`](https://mjskay.github.io/ggdist/articles/dotsinterval.html), which by default visualizes +The below plot, @fig-showcase-probabilities uses `ggdist`'s [`stat_dotsinterval()`](https://mjskay.github.io/ggdist/articles/dotsinterval.html), which by default visualizes * [an `rvar`'s median and central 66% and 95% intervals](https://mjskay.github.io/ggdist/reference/stat_dotsinterval.html#:~:text=point_interval%20%3D%20%22median_qi%22%2C%0A%20%20.width%20%3D%20c(0.66%2C%200.95)%2C) using a black dot and lines of varying thicknesses as when using `ggdist`'s [`stat_pointinterval()`](https://mjskay.github.io/ggdist/reference/stat_pointinterval.html#examples) and * an `rvar`'s draws using grey dots as when using `ggdist`'s [`stat_dots()`](https://mjskay.github.io/ggdist/reference/stat_dots.html#examples): @@ -334,19 +333,19 @@ r1 = rvar(p1) ggplot(data.frame( rv_name=c("control", "treatment"), rv=c(r0, r1) )) + - aes(xdist=rv, y=rv_name) + - labs(x="probabilities of death", y="patient group") + + aes(xdist=rv, y=rv_name) + + labs(x="probabilities of death", y="patient group") + stat_dotsinterval() ``` -`rvar`s make it easy to compute functions of random variables, such as +`rvar`s make it easy to compute functions of random variables, such as * differences, e.g. $p_0 - p_1$: `r0 - r1` computes an `rvar` which prints as `r r0 - r1`, indicating the **sample mean** and the **sample standard deviation** of the difference of the probabilities of death, * products, e.g. $p_0 \, p_1$: `r0 * r1` computes an `rvar` which prints as `r r0 * r1` which in this case -has no great interpretation, or +has no great interpretation, or * the odds ratios needed in task 3.b). -Below, in @fig-showcase-odds-ratios, we compute the odds ratios using the `rvar`s and visualize its median, central intervals and draws, as above in @fig-showcase-probabilities: +Below, in @fig-showcase-odds-ratios, we compute the odds ratios using the `rvar`s and visualize its median, central intervals and draws, as above in @fig-showcase-probabilities: ```{r} #| label: fig-showcase-odds-ratios #| fig-cap: Odds ratios of the two patient groups. @@ -354,8 +353,8 @@ rodds_ratio = (r1/(1-r1))/(r0/(1-r0)) ggplot(data.frame( rv=c(rodds_ratio) )) + - aes(xdist=rv) + - labs(x="odds ratio", y="relative amount of draws") + + aes(xdist=rv) + + labs(x="odds ratio", y="relative amount of draws") + stat_dotsinterval() ``` @@ -370,16 +369,16 @@ You can use @fig-showcase-odds-ratios to visually check whether the answers you ::: :::: - + ## (c) -Write your answers and code here! +Write your answers and code here! -# Inference for the difference between normal means (3 points) +# Inference for the difference between normal means (3 points) Loading the library and the data. ``` {r} @@ -394,15 +393,15 @@ head(windshieldy2) ## (a) -Write your answers here! +Write your answers here! ## (b) -Write your answers and code here! -```{r} +Write your answers and code here! +```{r} # Useful functions: mean(), length(), sqrt(), sum(), # rtnew() (from aaltobda), quantile() and hist(). ``` @@ -412,7 +411,7 @@ Write your answers and code here! ## (c) -Write your answers here! +Write your answers here! @@ -424,15 +423,14 @@ Write your answers here! *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + The following will check the functions for which `markmyassignment` has been set up: - -```{r} -mark_my_assignment() -``` + +```{r} +mark_my_assignment() +``` ::: :::: - diff --git a/assignments/template4.qmd b/assignments/template4.qmd index de83f7bf..952c159d 100644 --- a/assignments/template4.qmd +++ b/assignments/template4.qmd @@ -1,15 +1,15 @@ --- title: "Assignment 4" author: anonymous # <-- hand in anonymously -format: +format: html: toc: true code-tools: true - code-line-numbers: true + code-line-numbers: true number-sections: true mainfont: Georgia, serif page-layout: article - pdf: + pdf: geometry: - left=1cm,top=1cm,bottom=1cm,right=7cm number-sections: true @@ -22,37 +22,36 @@ editor: source :::: {.content-hidden when-format="pdf"} ::: {.callout-warning collapse=false} - -## Setup + +## Setup *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + This is the template for [assignment 4](assignment4.html). You can download the [qmd-file](https://avehtari.github.io/BDA_course_Aalto/assignments/template4.qmd) or copy the code from this rendered document after clicking on ` Code` in the top right corner. -**Please replace the instructions in this template by your own text, explaining what you are doing in each exercise.** +**Please replace the instructions in this template by your own text, explaining what you are doing in each exercise.** The following will set-up [`markmyassignment`](https://github.com/MansMeg/markmyassignment) to check your functions at the end of the notebook: - -```{r} + +```{r} if(!require(markmyassignment)){ install.packages("markmyassignment") library(markmyassignment) } assignment_path = paste("https://github.com/avehtari/BDA_course_Aalto/", "blob/master/assignments/tests/assignment4.yml", sep="") -set_assignment(assignment_path) -``` +set_assignment(assignment_path) +``` The following installs and loads the `aaltobda` package: ```{r} if(!require(aaltobda)){ - install.packages("remotes") - remotes::install_github("avehtari/BDA_course_Aalto", subdir = "rpackage", upgrade="never") + install.packages("aaltobda", repos = c("https://avehtari.github.io/BDA_course_Aalto/", getOption("repos"))) library(aaltobda) } ``` @@ -68,7 +67,7 @@ if(!require(latex2exp)){ :::: -# Bioassay model +# Bioassay model ## (a) @@ -165,15 +164,14 @@ posterior_mean <- function(alpha, beta) { *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + The following will check the functions for which `markmyassignment` has been set up: - -```{r} -mark_my_assignment() -``` + +```{r} +mark_my_assignment() +``` ::: :::: - diff --git a/assignments/template5.qmd b/assignments/template5.qmd index f1a7d4c0..a039645b 100644 --- a/assignments/template5.qmd +++ b/assignments/template5.qmd @@ -1,15 +1,15 @@ --- title: "Assignment 5" author: anonymous # <-- hand in anonymously -format: +format: html: toc: true code-tools: true - code-line-numbers: true + code-line-numbers: true number-sections: true mainfont: Georgia, serif page-layout: article - pdf: + pdf: geometry: - left=1cm,top=1cm,bottom=1cm,right=7cm number-sections: true @@ -22,37 +22,36 @@ editor: source :::: {.content-hidden when-format="pdf"} ::: {.callout-warning collapse=false} - -## Setup + +## Setup *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + This is the template for [assignment 5](assignment5.html). You can download the [qmd-file](https://avehtari.github.io/BDA_course_Aalto/assignments/template5.qmd) or copy the code from this rendered document after clicking on ` Code` in the top right corner. -**Please replace the instructions in this template by your own text, explaining what you are doing in each exercise.** +**Please replace the instructions in this template by your own text, explaining what you are doing in each exercise.** The following will set-up [`markmyassignment`](https://github.com/MansMeg/markmyassignment) to check your functions at the end of the notebook: - -```{r} + +```{r} if(!require(markmyassignment)){ install.packages("markmyassignment") library(markmyassignment) } assignment_path = paste("https://github.com/avehtari/BDA_course_Aalto/", "blob/master/assignments/tests/assignment5.yml", sep="") -set_assignment(assignment_path) -``` +set_assignment(assignment_path) +``` The following installs and loads the `aaltobda` package: ```{r} if(!require(aaltobda)){ - install.packages("remotes") - remotes::install_github("avehtari/BDA_course_Aalto", subdir = "rpackage", upgrade="never") + install.packages("aaltobda", repos = c("https://avehtari.github.io/BDA_course_Aalto/", getOption("repos"))) library(aaltobda) } ``` @@ -112,19 +111,19 @@ density_ratio <- function(alpha_propose, alpha_previous, beta_propose, beta_prev # beta_propose = 20.04, beta_previous = 24.76, # x = bioassay$x, y = bioassay$y, n = bioassay$n 0.7661784 - + } # Then implement a function called `metropolis_bioassay()` which # implements the Metropolis algorithm using the `density_ratio()`: metropolis_bioassay <- function(alpha_initial, beta_initial, alpha_sigma, beta_sigma, no_draws, x, y, n){ # Do computation here, and return as below. - # Below are "wrong" values (unlikely to actually occur) + # Below are "wrong" values (unlikely to actually occur) # in the "correct" format (such that they work with the plotting functions further down). data.frame( - alpha=c(alpha_initial, alpha_initial+alpha_sigma, alpha_initial-alpha_sigma), + alpha=c(alpha_initial, alpha_initial+alpha_sigma, alpha_initial-alpha_sigma), beta=c(beta_initial, beta_initial+beta_sigma, beta_initial-beta_sigma) ) - + } df = metropolis_bioassay(0, 0, 1, 1, 1000, bioassay$x, bioassay$y, bioassay$n) ``` @@ -177,15 +176,14 @@ mcmc_scatter(df, pars=c("alpha", "beta")) *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + The following will check the functions for which `markmyassignment` has been set up: - -```{r} -mark_my_assignment() -``` + +```{r} +mark_my_assignment() +``` ::: :::: - diff --git a/assignments/template6.qmd b/assignments/template6.qmd index 440585bc..16283de5 100644 --- a/assignments/template6.qmd +++ b/assignments/template6.qmd @@ -1,15 +1,15 @@ --- title: "Assignment 6" author: anonymous # <-- hand in anonymously -format: +format: html: toc: true code-tools: true - code-line-numbers: true + code-line-numbers: true number-sections: true mainfont: Georgia, serif page-layout: article - pdf: + pdf: geometry: - left=1cm,top=1cm,bottom=1cm,right=7cm number-sections: true @@ -22,19 +22,19 @@ editor: source This is the template for [assignment 6](assignment6.html). You can download the [broken stan-file](./additional_files/assignment6_linear_model.stan) and the [qmd-file](https://avehtari.github.io/BDA_course_Aalto/assignments/template6.qmd) or copy the code from this rendered document after clicking on ` Code` in the top right corner. -**Please replace the instructions in this template by your own text, explaining what you are doing in each exercise.** +**Please replace the instructions in this template by your own text, explaining what you are doing in each exercise.** :::: {.content-hidden when-format="pdf"} ::: {.callout-warning collapse=false} - -## Setup + +## Setup *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + @@ -43,8 +43,7 @@ JupyterHub has all the needed packages pre-installed. The following installs and loads the `aaltobda` package: ```{r} if(!require(aaltobda)){ - install.packages("remotes") - remotes::install_github("avehtari/BDA_course_Aalto", subdir = "rpackage", upgrade="never") + install.packages("aaltobda", repos = c("https://avehtari.github.io/BDA_course_Aalto/", getOption("repos"))) library(aaltobda) } ``` @@ -121,7 +120,7 @@ Write your answers/code here! *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + @@ -130,7 +129,7 @@ Write your answers/code here! #| warning: false # These are our observations y: the proportion of students handing in each assignment (1-8), # sorted by year (row-wise) and assignment (column-wise). -# While the code suggest a matrix structure, +# While the code suggest a matrix structure, # the result will actually be a vector of length N = no_years * no_assignments propstudents<-c(c(176, 174, 158, 135, 138, 129, 126, 123)/176, c(242, 212, 184, 177, 174, 172, 163, 156)/242, @@ -140,7 +139,7 @@ propstudents<-c(c(176, 174, 158, 135, 138, 129, 126, 123)/176, # These are our predictors x: for each observation, the corresponding assignment number. assignment <- rep(1:8, 5) # These are in some sense our test data: the proportion of students handing in the last assignment (9), -# sorted by year. +# sorted by year. # Usually, we would not want to split our data like that and instead # use e.g. Leave-One-Out Cross-Validation (LOO-CV, see e.g. http://mc-stan.org/loo/index.html) # to evaluate model performance. @@ -158,8 +157,8 @@ model_data = list(N=length(assignment), ``` **Sampling from the posterior distribution happens here**: ```{r} -#| warning: false -# This reads the file at the specified path and tries to compile it. +#| warning: false +# This reads the file at the specified path and tries to compile it. # If it fails, an error is thrown. retention_model = cmdstan_model("./additional_files/assignment6_linear_model.stan") # This "out <- capture.output(...)" construction suppresses output from cmdstanr @@ -175,21 +174,21 @@ out <- capture.output( # This extracts the draws from the sampling result as a data.frame. draws_df = fit$draws(format="draws_df") -# This does some data/draws wrangling to compute the 5, 50 and 95 percentiles of -# the mean at the specified covariate values (x_predictions). +# This does some data/draws wrangling to compute the 5, 50 and 95 percentiles of +# the mean at the specified covariate values (x_predictions). # It can be instructive to play around with each of the data processing steps # to find out what each step does, e.g. by removing parts from the back like "|> gather(pct,y,-x)" # and printing the resulting data.frame. -mu_quantiles_df = draws_df |> - subset_draws(variable = c("mu_pred")) |> - summarise_draws(~quantile2(.x, probs = c(0.05, .5, 0.95))) |> - mutate(x = 1:9) |> +mu_quantiles_df = draws_df |> + subset_draws(variable = c("mu_pred")) |> + summarise_draws(~quantile2(.x, probs = c(0.05, .5, 0.95))) |> + mutate(x = 1:9) |> pivot_longer(c(q5, q50, q95), names_to = c("pct")) # Same as above, but for the predictions. -y_quantiles_df = draws_df |> - subset_draws(variable = c("y_pred")) |> - summarise_draws(~quantile2(.x, probs = c(0.05, .5, 0.95))) |> - mutate(x = 1:9) |> +y_quantiles_df = draws_df |> + subset_draws(variable = c("y_pred")) |> + summarise_draws(~quantile2(.x, probs = c(0.05, .5, 0.95))) |> + mutate(x = 1:9) |> pivot_longer(c(q5, q50, q95), names_to = c("pct")) ``` @@ -203,14 +202,14 @@ y_quantiles_df = draws_df |> #| label: fig-posterior #| fig-cap: Describe me in your submission! ggplot() + - # scatter plot of the training data: + # scatter plot of the training data: geom_point( - aes(x, y, color=assignment), + aes(x, y, color=assignment), data=data.frame(x=assignment, y=propstudents, assignment="1-8") ) + # scatter plot of the test data: geom_point( - aes(x, y, color=assignment), + aes(x, y, color=assignment), data=data.frame(x=no_assignments, y=propstudents9, assignment="9") ) + # you have to tell us what this plots: @@ -239,7 +238,7 @@ ggplot() + *This block will only be visible in your HTML output, but will be hidden when rendering to PDF with quarto for the submission.* **Make sure that this does not get displayed in the PDF!** - + @@ -261,7 +260,7 @@ fit$cmdstan_diagnose() Write your answers/code here! -# Generalized linear model: Bioassay with Stan (4 points) +# Generalized linear model: Bioassay with Stan (4 points) ## (d) @@ -291,4 +290,3 @@ Write your answers/code here! Write your answers/code here! -