Skip to content

Latest commit

 

History

History
533 lines (452 loc) · 18 KB

logistic.md

File metadata and controls

533 lines (452 loc) · 18 KB

Comparations

LMLS 2020-12-24

data

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0

## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(caret)
## Loading required package: lattice

## 
## Attaching package: 'caret'

## The following object is masked from 'package:purrr':
## 
##     lift
setwd("~/Documentos/R/Stroke/death/")
y2016deaths <- read.csv("yearsdeaths.cvs")
glimpse(y2016deaths)
## Rows: 6,566
## Columns: 9
## $ sexo      <chr> "Women", "Women", "Men", "Men", "Women", "Women", "Men", "M…
## $ edad      <int> 70, 87, 80, 66, 57, 56, 64, 64, 37, 90, 73, 98, 85, 92, 92,…
## $ cid10     <chr> "I64", "I64", "I64", "I64", "I64", "I64", "I60", "I60", "I6…
## $ agecat    <chr> "55-74", "75+", "75+", "55-74", "55-74", "55-74", "55-74", …
## $ level     <chr> "I, II", "I, II", "III", "III", "III", "III", "III", "III",…
## $ regions   <chr> "Sierra", "Sierra", "Sierra", "Sierra", "Sierra", "Sierra",…
## $ condicion <chr> "Discharged", "Discharged", "Discharged", "Discharged", "Di…
## $ los       <int> 2, 1, 28, 14, 2, 7, 10, 3, 12, 11, 12, 5, 4, 5, 12, 8, 19, …
## $ anio      <int> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,…

exposure

y2016deaths$exposure <- ifelse(y2016deaths$cid10=="I63",0,
                               ifelse(y2016deaths$cid10=="I60",1,
                                      ifelse(y2016deaths$cid10=="I61",2,
                                             ifelse(y2016deaths$cid10=="I64",3,NA))))
y2016deaths$exposure <- factor(y2016deaths$exposure, levels = c(0,1,2,3), labels = c("I63", "I60", "I61","I64"))
sum(is.na(y2016deaths$exposure)) #0
## [1] 0

outcome

y2016deaths$outcome <- ifelse(y2016deaths$condicion=="Discharged", 0, 
                              ifelse(y2016deaths$condicion=="Death", 1, NA)) %>% 
  factor(y2016deaths$outcome, levels = c(0,1), labels = c("Discharged","Death"))

logistic model

glm.fit <- glm(outcome ~ exposure + sexo + edad,
               data = y2016deaths,
               family = binomial)

strata

glm.fit.1 <- glm(outcome ~ exposure + sexo + edad +level,
               data = y2016deaths,
               family = binomial, subset = y2016deaths$regions=="Lima/Callao")

glm.fit.2 <- glm(outcome ~ exposure + sexo + edad +level,
               data = y2016deaths,
               family = binomial, subset = y2016deaths$regions=="Resto Costa")

glm.fit.3 <- glm(outcome ~ exposure + sexo + edad +level,
               data = y2016deaths,
               family = binomial, subset = y2016deaths$regions=="Selva")

glm.fit.4 <- glm(outcome ~ exposure + sexo + edad +level,
               data = y2016deaths,
               family = binomial, subset = y2016deaths$regions=="Sierra")

glm.fit.5 <- glm(outcome ~ exposure + sexo + edad +regions,
               data = y2016deaths,
               family = binomial, subset = y2016deaths$level=="I, II")

glm.fit.6 <- glm(outcome ~ exposure + sexo + edad +regions,
               data = y2016deaths,
               family = binomial, subset = y2016deaths$level=="III")

Coeff~OR

summary(glm.fit)
## 
## Call:
## glm(formula = outcome ~ exposure + sexo + edad, family = binomial, 
##     data = y2016deaths)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8725  -0.5266  -0.4483  -0.3729   2.5386  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.756189   0.232665 -16.144  < 2e-16 ***
## exposureI60  1.399018   0.136506  10.249  < 2e-16 ***
## exposureI61  0.904375   0.122736   7.368 1.73e-13 ***
## exposureI64  0.414292   0.112511   3.682 0.000231 ***
## sexoWomen    0.196411   0.079564   2.469 0.013565 *  
## edad         0.015121   0.002984   5.067 4.05e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4693.9  on 6565  degrees of freedom
## Residual deviance: 4545.1  on 6560  degrees of freedom
## AIC: 4557.1
## 
## Number of Fisher Scoring iterations: 5
exp(coef(glm.fit))
## (Intercept) exposureI60 exposureI61 exposureI64   sexoWomen        edad 
##  0.02337265  4.05122049  2.47038813  1.51329933  1.21702746  1.01523582
exp(confint(glm.fit))
## Waiting for profiling to be done...

##                  2.5 %     97.5 %
## (Intercept) 0.01474398 0.03671004
## exposureI60 3.10233290 5.29990044
## exposureI61 1.94521515 3.14845240
## exposureI64 1.21700336 1.89232339
## sexoWomen   1.04155262 1.42291031
## edad        1.00934399 1.02122358

##model1

summary(glm.fit.1)
## 
## Call:
## glm(formula = outcome ~ exposure + sexo + edad + level, family = binomial, 
##     data = y2016deaths, subset = y2016deaths$regions == "Lima/Callao")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0885  -0.4811  -0.3881  -0.3197   2.5495  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.06811    0.37939  -8.087 6.12e-16 ***
## exposureI60  1.49631    0.18554   8.065 7.34e-16 ***
## exposureI61  0.68811    0.17725   3.882 0.000104 ***
## exposureI64  0.13869    0.17361   0.799 0.424378    
## sexoWomen    0.37372    0.12612   2.963 0.003045 ** 
## edad         0.01247    0.00468   2.665 0.007706 ** 
## levelIII    -0.77833    0.15753  -4.941 7.78e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2002.5  on 3079  degrees of freedom
## Residual deviance: 1895.6  on 3073  degrees of freedom
## AIC: 1909.6
## 
## Number of Fisher Scoring iterations: 5
exp(coef(glm.fit.1))
## (Intercept) exposureI60 exposureI61 exposureI64   sexoWomen        edad 
##  0.04650882  4.46516642  1.98994441  1.14876723  1.45313045  1.01254956 
##    levelIII 
##  0.45917471
exp(confint(glm.fit.1))
## Waiting for profiling to be done...

##                  2.5 %     97.5 %
## (Intercept) 0.02191171 0.09701797
## exposureI60 3.10784282 6.43830128
## exposureI61 1.40631512 2.82088118
## exposureI64 0.81799040 1.61742529
## sexoWomen   1.13611486 1.86346300
## edad        1.00335038 1.02193788
## levelIII    0.33855148 0.62832664

##model2

summary(glm.fit.2)
## 
## Call:
## glm(formula = outcome ~ exposure + sexo + edad + level, family = binomial, 
##     data = y2016deaths, subset = y2016deaths$regions == "Resto Costa")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8209  -0.5407  -0.4713  -0.3166   2.6605  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -4.061311   0.491422  -8.264  < 2e-16 ***
## exposureI60  1.488841   0.352374   4.225 2.39e-05 ***
## exposureI61  1.507906   0.270070   5.583 2.36e-08 ***
## exposureI64  0.921608   0.281522   3.274  0.00106 ** 
## sexoWomen    0.101671   0.153344   0.663  0.50731    
## edad         0.014231   0.005602   2.540  0.01108 *  
## levelIII    -0.074570   0.181259  -0.411  0.68078    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1267.0  on 1794  degrees of freedom
## Residual deviance: 1221.6  on 1788  degrees of freedom
## AIC: 1235.6
## 
## Number of Fisher Scoring iterations: 5
exp(coef(glm.fit.2))
## (Intercept) exposureI60 exposureI61 exposureI64   sexoWomen        edad 
##  0.01722641  4.43195586  4.51726010  2.51332884  1.10701948  1.01433277 
##    levelIII 
##  0.92814221
exp(confint(glm.fit.2))
## Waiting for profiling to be done...

##                   2.5 %     97.5 %
## (Intercept) 0.006432556 0.04423645
## exposureI60 2.213772212 8.88712262
## exposureI61 2.714712281 7.86952121
## exposureI64 1.476531720 4.47325230
## sexoWomen   0.820026137 1.49694004
## edad        1.003360301 1.02565758
## levelIII    0.649971975 1.32346968

##model3

summary(glm.fit.3)
## 
## Call:
## glm(formula = outcome ~ exposure + sexo + edad + level, family = binomial, 
##     data = y2016deaths, subset = y2016deaths$regions == "Selva")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2181  -0.4502  -0.4044  -0.3908   2.3306  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.500264   0.824933  -3.031 0.002439 ** 
## exposureI60  0.529110   0.488843   1.082 0.279088    
## exposureI61  0.657892   0.489440   1.344 0.178893    
## exposureI64 -0.273555   0.397621  -0.688 0.491465    
## sexoWomen   -0.037676   0.280147  -0.134 0.893017    
## edad         0.004208   0.010860   0.387 0.698392    
## levelIII     1.672433   0.435920   3.837 0.000125 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 411.07  on 579  degrees of freedom
## Residual deviance: 380.18  on 573  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 394.18
## 
## Number of Fisher Scoring iterations: 5
exp(coef(glm.fit.3))
## (Intercept) exposureI60 exposureI61 exposureI64   sexoWomen        edad 
##   0.0820633   1.6974201   1.9307184   0.7606702   0.9630247   1.0042171 
##    levelIII 
##   5.3251087
exp(confint(glm.fit.3))
## Waiting for profiling to be done...

##                  2.5 %     97.5 %
## (Intercept) 0.01533718  0.3928546
## exposureI60 0.65069355  4.4994162
## exposureI61 0.73568364  5.1140594
## exposureI64 0.36001247  1.7363666
## sexoWomen   0.55274629  1.6648286
## edad        0.98332553  1.0262139
## levelIII    2.24913599 12.5643336

##model4

summary(glm.fit.4)
## 
## Call:
## glm(formula = outcome ~ exposure + sexo + edad + level, family = binomial, 
##     data = y2016deaths, subset = y2016deaths$regions == "Sierra")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0023  -0.6138  -0.5520  -0.4551   2.3853  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.216433   0.539194  -5.965 2.44e-09 ***
## exposureI60  1.014355   0.311933   3.252  0.00115 ** 
## exposureI61  0.641726   0.291842   2.199  0.02789 *  
## exposureI64 -0.004345   0.245606  -0.018  0.98589    
## sexoWomen    0.093928   0.166205   0.565  0.57199    
## edad         0.018889   0.006553   2.882  0.00395 ** 
## levelIII    -0.338013   0.223875  -1.510  0.13109    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 983.99  on 1109  degrees of freedom
## Residual deviance: 957.17  on 1103  degrees of freedom
## AIC: 971.17
## 
## Number of Fisher Scoring iterations: 4
exp(coef(glm.fit.4))
## (Intercept) exposureI60 exposureI61 exposureI64   sexoWomen        edad 
##  0.04009782  2.75758294  1.89975691  0.99566476  1.09848025  1.01906845 
##    levelIII 
##  0.71318609
exp(confint(glm.fit.4))
## Waiting for profiling to be done...

##                  2.5 %    97.5 %
## (Intercept) 0.01358141 0.1126879
## exposureI60 1.49954225 5.1162951
## exposureI61 1.07673245 3.3946731
## exposureI64 0.62356597 1.6391252
## sexoWomen   0.79355754 1.5236787
## edad        1.00626728 1.0324802
## levelIII    0.45286056 1.0920554

##model5

summary(glm.fit.5)
## 
## Call:
## glm(formula = outcome ~ exposure + sexo + edad + regions, family = binomial, 
##     data = y2016deaths, subset = y2016deaths$level == "I, II")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9969  -0.5697  -0.4851  -0.4063   2.5269  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -3.239249   0.361577  -8.959  < 2e-16 ***
## exposureI60         1.097073   0.236406   4.641 3.47e-06 ***
## exposureI61         0.753231   0.202442   3.721 0.000199 ***
## exposureI64         0.101723   0.171856   0.592 0.553912    
## sexoWomen           0.144382   0.112032   1.289 0.197482    
## edad                0.017090   0.004273   4.000 6.34e-05 ***
## regionsResto Costa -0.327105   0.167736  -1.950 0.051162 .  
## regionsSelva       -0.560717   0.199897  -2.805 0.005031 ** 
## regionsSierra       0.036304   0.162405   0.224 0.823117    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2278.2  on 2884  degrees of freedom
## Residual deviance: 2203.9  on 2876  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 2221.9
## 
## Number of Fisher Scoring iterations: 5
exp(coef(glm.fit.5))
##        (Intercept)        exposureI60        exposureI61        exposureI64 
##         0.03919332         2.99538671         2.12385180         1.10707647 
##          sexoWomen               edad regionsResto Costa       regionsSelva 
##         1.15532511         1.01723636         0.72100800         0.57079970 
##      regionsSierra 
##         1.03697075
exp(confint(glm.fit.5))
## Waiting for profiling to be done...

##                         2.5 %     97.5 %
## (Intercept)        0.01908401 0.07879767
## exposureI60        1.88313535 4.76663145
## exposureI61        1.43327861 3.17430391
## exposureI64        0.79703533 1.56541478
## sexoWomen          0.92785134 1.43986278
## edad               1.00882807 1.02587453
## regionsResto Costa 0.52040948 1.00530464
## regionsSelva       0.38446791 0.84305342
## regionsSierra      0.75691814 1.43185806

##model6

summary(glm.fit.6)
## 
## Call:
## glm(formula = outcome ~ exposure + sexo + edad + regions, family = binomial, 
##     data = y2016deaths, subset = y2016deaths$level == "III")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3421  -0.4955  -0.3878  -0.3239   2.5482  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -3.722895   0.323951 -11.492  < 2e-16 ***
## exposureI60         1.449754   0.173116   8.374  < 2e-16 ***
## exposureI61         0.938787   0.157055   5.977 2.27e-09 ***
## exposureI64         0.363652   0.170518   2.133   0.0330 *  
## sexoWomen           0.245344   0.114570   2.141   0.0322 *  
## edad                0.010115   0.004271   2.368   0.0179 *  
## regionsResto Costa  0.167030   0.137909   1.211   0.2258    
## regionsSelva        1.709119   0.384479   4.445 8.78e-06 ***
## regionsSierra       0.384566   0.212570   1.809   0.0704 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2397.0  on 3679  degrees of freedom
## Residual deviance: 2271.8  on 3671  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 2289.8
## 
## Number of Fisher Scoring iterations: 5
exp(coef(glm.fit.6))
##        (Intercept)        exposureI60        exposureI61        exposureI64 
##          0.0241639          4.2620666          2.5568782          1.4385733 
##          sexoWomen               edad regionsResto Costa       regionsSelva 
##          1.2780608          1.0101668          1.1817896          5.5240950 
##      regionsSierra 
##          1.4689769
exp(confint(glm.fit.6))
## Waiting for profiling to be done...

##                         2.5 %      97.5 %
## (Intercept)        0.01269958  0.04523753
## exposureI60        3.04003454  5.99765873
## exposureI61        1.88402601  3.49023777
## exposureI64        1.02963935  2.01136320
## sexoWomen          1.02161260  1.60129541
## edad               1.00178443  1.01870625
## regionsResto Costa 0.89800677  1.54283467
## regionsSelva       2.56592316 11.72838160
## regionsSierra      0.95249752  2.19742849