library('tidyverse') library(expand_grid) options(stringsAsFactors=FALSE) theme_set(theme_light()) ## Start by loading a pre-cleaned dataset: (load("d1.rda")) # And extract only information we need for the GAM: dg <- d1 %>% select(pou = gu_seges, farm_no, sample_no, solid_phase_prcnt, sex=SEX_FM) %>% mutate(sex = factor(sex, levels=c("Male","Female"))) str(dg) summary(dg) library('mgcv') library('oddsratio') ## NB: sex does not significantly improve fit (p=0.248): model1 <- gamm(pou ~ sex + s(solid_phase_prcnt), random=list(farm_no=~1), data=dg, family='binomial') model2 <- gamm(pou ~ s(solid_phase_prcnt), random=list(farm_no=~1), data=dg, family='binomial') anova(model1$gam, model2$gam) # but we can probably keep it anyway summary(model1) ##Final model ### model_final <- gam(pou ~ sex + s(solid_phase_prcnt) + s(farm_no, bs='re'), data=dg, family='binomial') summary(model_final) #Odds Ratio by 20 quantil% or_gam( data = dg, model = model_final, pred = "solid_phase_prcnt", percentage = 20, slice = TRUE ) ## Create a plot: fakedata <- expand_grid(solid_phase_prcnt=seq(20, 100, by=1), sex=c('Male','Female')) preds <- predict(model1$gam, newdata=fakedata, se.fit=TRUE) fakedata$Prediction <- plogis(preds$fit) fakedata$Lower <- plogis(preds$fit - 1.96*preds$se.fit) fakedata$Upper <- plogis(preds$fit + 1.96*preds$se.fit) ggplot(fakedata, aes(x=solid_phase_prcnt, y=Prediction, ymin=Lower, ymax=Upper)) + geom_ribbon(alpha=0.25) + geom_line() + scale_y_continuous(trans='logit', breaks=c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99)) + facet_wrap(~sex) + ylab("Predicted probability of ulceration (logit scale)") + xlab("Solid phase percent") ggsave("gam_figure.pdf", width=10, height=6) ## We need a different formulation of the same model to get confidence intervals for farm effects: model3 <- gam(pou ~ sex + s(solid_phase_prcnt) + s(farm_no, bs='re'), data=dg, family='binomial') preds <- predict(model3, newdata=tibble(farm_no=unique(dg$farm_no), solid_phase_prcnt=100, sex='Female'), type='terms', terms="s(farm_no)", se.fit=TRUE) or_farms <- tibble(Effect=str_c('Farm_', 1:10), coef=as.numeric(preds$fit), se=as.numeric(preds$se.fit)) %>% mutate(OR = exp(coef), LowerCI = exp(coef-1.96*se), UpperCI = exp(coef+1.96*se)) # We can also get sex OR from this: or_sex <- broom::tidy(model3, parametric=TRUE, conf.int=TRUE) %>% mutate(OR = exp(estimate), LowerCI = exp(conf.low), UpperCI = exp(conf.high)) # Export as a table: or_tab <- bind_rows( or_farms %>% select(Effect, OR, LowerCI, UpperCI), or_sex %>% filter(term=="sexFemale") %>% select(Effect=term, OR, LowerCI, UpperCI) ) %>% mutate_if(is.numeric, function(x) format(round(x,2))) write_excel_csv(or_tab, "or_tab.csv") ## Summary table for smoothing terms: write_excel_csv(broom::tidy(model3), "s_tab.csv")