pacman::p_load(car, # función recode
                stargazer, # tablas latex
                lme4, # multilevel
                lattice, # dotplot
                sjPlot, # multilevel plots
                dplyr,
                doBy, # agregar datos
                VGAM, # predict logit
                texreg,
                nlme,
                dplyr,
                sjmisc,
                sjPlot)
options(scipen = 999) # para desactivar notacion cientificaModelos multinivel para variables categóricas
Correspondiente a la sesión del jueves, 5 de junio de 2025
Cargar paquetes
1. Ejemplo de regresión logística (simple,1 nivel)
Ejemplo basado en Finch et al 2014, cap 7.
- Dependiente: diagnóstico de enfermedad (arteria coronaria)
 - Independiente: tiempo de caminata hasta fatigarse
 - Pregunta: en qué medida la fatiga al caminar puede predecir enfermedad coronaria?
 
Leer y explorar los datos
data=read.csv("https://github.com/cursos-metodos-facso/multinivel-facso/raw/refs/heads/main/assignment/data/CoronaryArtery.csv")
names(data)[1] "time"  "group"
summary(data)      time            group    
 Min.   : 594.0   Min.   :1.0  
 1st Qu.: 702.0   1st Qu.:1.0  
 Median : 798.0   Median :1.5  
 Mean   : 847.2   Mean   :1.5  
 3rd Qu.: 993.0   3rd Qu.:2.0  
 Max.   :1359.0   Max.   :2.0  
stargazer(data,title="Estadísticos descriptivos", type = "html", digits=2)| Statistic | N | Mean | St. Dev. | Min | Max | 
| time | 20 | 847.25 | 203.98 | 594 | 1,359 | 
| group | 20 | 1.50 | 0.51 | 1 | 2 | 
Recodificar datos de variable dependiente
data$group=rec(data$group, rec="1=0;2=1")
attach(data)
## Estimar modelo
coronary.logistic=glm(as.factor(group)~time, family = binomial, data = data)
sjPlot::tab_model(coronary.logistic, show.ci = FALSE)| as factor(group) | ||
| Predictors | Odds Ratios | p | 
| (Intercept) | 721399.97 | 0.022 | 
| time | 0.98 | 0.025 | 
| Observations | 20 | |
| R2 Tjur | 0.593 | |
#odds ratios
exp(coef(coronary.logistic))   (Intercept)           time 
721399.9685791      0.9836024 
Interpretación
- General: mientras menos tiempo camina, más probable que tenga enfermedad coronaria
 - Logit: Por cada segundo que camina, el log de los odds de enfermedad coronaria disminuye en 0.0165
 - Transformando el logit 
 =0.984: por cada segundo adicional antes de fatigarse, los odds estimados de tener un ataque al corazón se multiplican por 0.984 - Por cada minuto adicional antes de fatigarse, los odds de tener problemas cardiacos disminuyen en 
 =0.378 
Multinivel Logístico
- Datos: Bangladesh Demographic and Health Survey 2004 Dataset (LEMMA 2011)
- Variable dependiente: recepción de atención profesional prenatal (antemed), mujeres de 13 a 49 años
- mage: edad madre
- magec: edad madre centrada
- urban: territorio urbano
- Cluster: comunidad (comm)
Datos
mydata <- read.table("https://github.com/cursos-metodos-facso/multinivel-facso/raw/refs/heads/main/assignment/data/7.1.txt", header = TRUE, sep = ",")
names(mydata) [1] "comm"    "womid"   "antemed" "bord"    "mage"    "urban"   "meduc"  
 [8] "islam"   "wealth"  "magec"   "magecsq" "meduc2"  "meduc3"  "wealth2"
[15] "wealth3" "wealth4" "wealth5"
dim(mydata)[1] 5366   17
# Seleccionar variables a utilizar
mydata = mydata %>% select(comm, antemed,
                           magec, mage, urban)
names(mydata)[1] "comm"    "antemed" "magec"   "mage"    "urban"  
summary(mydata[2:3])    antemed          magec         
 Min.   :0.000   Min.   :-10.6344  
 1st Qu.:0.000   1st Qu.: -4.6344  
 Median :1.000   Median : -0.6344  
 Mean   :0.513   Mean   :  0.0000  
 3rd Qu.:1.000   3rd Qu.:  4.3656  
 Max.   :1.000   Max.   : 25.3656  
mydata_agg = mydata %>% group_by(comm) %>%
            summarise_all(funs(mean)) %>% as.data.frame()Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:
# Simple named list: list(mean = mean, median = median)
# Auto named with `tibble::lst()`: tibble::lst(mean, median)
# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
summary(mydata_agg)      comm          antemed           magec               mage      
 Min.   :  1.0   Min.   :0.0000   Min.   :-4.92008   Min.   :18.71  
 1st Qu.: 91.0   1st Qu.:0.3125   1st Qu.:-1.50393   1st Qu.:22.13  
 Median :181.0   Median :0.5333   Median :-0.13436   Median :23.50  
 Mean   :222.6   Mean   :0.5323   Mean   :-0.08881   Mean   :23.55  
 3rd Qu.:332.0   3rd Qu.:0.7500   3rd Qu.: 1.22278   3rd Qu.:24.86  
 Max.   :550.0   Max.   :1.0000   Max.   : 6.36564   Max.   :30.00  
     urban      
 Min.   :0.000  
 1st Qu.:0.000  
 Median :0.000  
 Mean   :0.338  
 3rd Qu.:1.000  
 Max.   :1.000  
stargazer(mydata_agg, type="text")
============================================
Statistic  N   Mean   St. Dev.  Min    Max  
--------------------------------------------
comm      361 222.593 163.308    1     550  
antemed   361  0.532   0.269   0.000  1.000 
magec     361 -0.089   1.988   -4.920 6.366 
mage      361 23.546   1.988   18.714 30.000
urban     361  0.338   0.474     0      1   
--------------------------------------------
str(mydata_agg)'data.frame':   361 obs. of  5 variables:
 $ comm   : int  1 2 3 4 5 6 7 8 9 10 ...
 $ antemed: num  0.5 0.368 0.381 0.667 0.318 ...
 $ magec  : num  1.366 -1.003 1.508 1.032 0.775 ...
 $ mage   : num  25 22.6 25.1 24.7 24.4 ...
 $ urban  : num  0 0 0 0 0 0 0 0 0 0 ...
Casos nivel 2
casesL2 = mydata %>% select(comm, antemed) %>% group_by(comm) %>% mutate(antemed.length=length(antemed)) # numero de casos por comunidad
summary(casesL2)      comm          antemed      antemed.length 
 Min.   :  1.0   Min.   :0.000   Min.   : 3.00  
 1st Qu.: 82.0   1st Qu.:0.000   1st Qu.:13.00  
 Median :176.0   Median :1.000   Median :16.00  
 Mean   :214.3   Mean   :0.513   Mean   :15.98  
 3rd Qu.:328.0   3rd Qu.:1.000   3rd Qu.:19.00  
 Max.   :550.0   Max.   :1.000   Max.   :25.00  
Especificar modelo de intercepto aleatorio
fit <- glmer(antemed ~ (1 | comm), family = binomial, data = mydata)
summary(fit)Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: antemed ~ (1 | comm)
   Data: mydata
     AIC      BIC   logLik deviance df.resid 
  6639.5   6652.7  -3317.8   6635.5     5364 
Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.7779 -0.7458  0.3423  0.7118  2.6784 
Random effects:
 Groups Name        Variance Std.Dev.
 comm   (Intercept) 1.464    1.21    
Number of obs: 5366, groups:  comm, 361
Fixed effects:
            Estimate Std. Error z value Pr(>|z|)  
(Intercept)  0.14809    0.07178   2.063   0.0391 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sjPlot::tab_model(fit, show.ci = FALSE)| antemed | ||
|---|---|---|
| Predictors | Odds Ratios | p | 
| (Intercept) | 1.16 | 0.039 | 
| Random Effects | ||
| σ2 | 3.29 | |
| τ00 comm | 1.46 | |
| ICC | 0.31 | |
| N comm | 361 | |
| Observations | 5366 | |
| Marginal R2 / Conditional R2 | 0.000 / 0.308 | |
Interpretación:
- Los log-odds de recibir cuidado prenatal en una comunidad “promedio” (con 
 ) se estima como - El intercepto para una comunidad 
 es 0.148+ - La varianza de 
 es =1.464 
Ajuste
Para evaluar el ajuste, una posibilidad es comparar con un modelo logístico sin efectos aleatorios
fita <- glm(antemed ~ 1, data = mydata, family = binomial("logit"))
anova(fit,fita)Data: mydata
Models:
fita: antemed ~ 1
fit: antemed ~ (1 | comm)
     npar    AIC    BIC  logLik deviance  Chisq Df            Pr(>Chisq)    
fita    1 7437.2 7443.8 -3717.6   7435.2                                    
fit     2 6639.5 6652.7 -3317.8   6635.5 799.68  1 < 0.00000000000000022 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Dado que el test es signifícativo, existe evidencia de que la varianza entre comunidades no es cero.
Correlación intra-clase
Para el cálculo de la correlación intra-clase hay que considerar el escalamiento de la varianza de los residuos a nivel individual. Para el caso de un modelo probit equivale a 1, mientras que para un logit=3.29
(icc=1.46/(3.29+1.46))[1] 0.3073684
Agregando variable nivel 2 e interacción
fit_l2 <- glmer(antemed ~ magec + urban + (1 | comm), 
          data = mydata, family = binomial("logit"))
fit_int <- glmer(antemed ~ magec + urban + magec*urban + (1 + magec | comm), data = mydata, family = binomial("logit"))boundary (singular) fit: see help('isSingular')
sjPlot::tab_model(fit_l2, fit_int,
                  show.ci = FALSE)boundary (singular) fit: see help('isSingular')
| antemed | antemed | |||
|---|---|---|---|---|
| Predictors | Odds Ratios | p | Odds Ratios | p | 
| (Intercept) | 0.71 | <0.001 | 0.71 | <0.001 | 
| magec | 0.97 | <0.001 | 0.96 | <0.001 | 
| urban | 4.46 | <0.001 | 4.46 | <0.001 | 
| magec × urban | 1.02 | 0.170 | ||
| Random Effects | ||||
| σ2 | 3.29 | 3.29 | ||
| τ00 | 0.97 comm | 0.97 comm | ||
| τ11 | 0.00 comm.magec | |||
| ρ01 | 1.00 comm | |||
| ICC | 0.23 | |||
| N | 361 comm | 361 comm | ||
| Observations | 5366 | 5366 | ||
| Marginal R2 / Conditional R2 | 0.110 / 0.313 | 0.138 / NA | ||