1 Constants

##still do not know how to do this effectively.
path2data <- "C:/Users/huongvtl/OneDrive - Oxford University Clinical Research Unit/60HN CRE transmission manuscript/"

2 Packages

3 Functions

'%ni%' <- Negate('%in%')

readxl_allsheet <- function(filename, tibble = FALSE) {
    # I prefer straight data.frames
    # but if you like tidyverse tibbles (the default with read_excel)
    # then just pass tibble = TRUE
    sheets <- readxl::excel_sheets(filename)
    x <- lapply(sheets, function(X) readxl::read_excel(filename, sheet = X))
    if(!tibble) x <- lapply(x, as.data.frame)
    names(x) <- sheets
    x
}

4 Datasets and variables

4.1 Source data

survey <- readxl_allsheet("C:/Users/huongvtl/Dropbox/Manuscript_MicroBaseline_15DEC2025/Data Export - 60HN_PATIENT - 16Dec2025/16-12-2025-_60HN_PATIENT_P1_Data.xls")

## Warning: Expecting logical in P1142 / R1142C16: got 'công ty'

## Warning: Expecting logical in P1143 / R1143C16: got 'Công ty'

## Warning: Expecting logical in AS1278 / R1278C45: got '3'

## Warning: Expecting logical in AS1279 / R1279C45: got '3'

## Warning: Expecting logical in P1346 / R1346C16: got 'TNGT, Quốc lộ 30'

## Warning: Expecting logical in P1347 / R1347C16: got 'TNGT quốc lộ 30'

## Warning: Expecting logical in P1362 / R1362C16: got 'TNGT'

## Warning: Expecting logical in P1363 / R1363C16: got 'TNGT'

## Warning: Expecting logical in AS1463 / R1463C45: got '3'

## Warning: Expecting logical in P1640 / R1640C16: got 'Ngoài đường'

## Warning: Expecting logical in P1641 / R1641C16: got 'Ngoài đường'

## Warning: Expecting logical in P1644 / R1644C16: got 'tai nạn ngoài đường'

## Warning: Expecting logical in P1645 / R1645C16: got 'Tai nạn ngoài đường'

## Warning: Expecting logical in AS2020 / R2020C45: got '3'

## Warning: Expecting logical in AS2252 / R2252C45: got '3'

## Warning: Expecting logical in AS2262 / R2262C45: got '3'

## Warning: Expecting logical in AS2293 / R2293C45: got '3'

## Warning: Expecting logical in AS2484 / R2484C45: got '3'

## Warning: Expecting logical in AS2485 / R2485C45: got '3'

## Warning: Expecting logical in AS2526 / R2526C45: got '3'

## Warning: Expecting logical in AS2527 / R2527C45: got '3'

## Warning: Expecting logical in AS2556 / R2556C45: got '3'

## Warning: Expecting logical in AS2557 / R2557C45: got '3'

## Warning: Expecting logical in AS2568 / R2568C45: got '3'

## Warning: Expecting logical in AS2569 / R2569C45: got '3'

## Warning: Expecting logical in AS2580 / R2580C45: got '3'

## Warning: Expecting logical in AS2581 / R2581C45: got '3'

## Warning: Expecting logical in AS2588 / R2588C45: got '3'

## Warning: Expecting logical in AS2589 / R2589C45: got '3'

## Warning: Expecting logical in AS2598 / R2598C45: got '3'

## Warning: Expecting logical in AS2599 / R2599C45: got '3'

## Warning: Expecting logical in AS2600 / R2600C45: got '3'

## Warning: Expecting logical in AS2601 / R2601C45: got '3'

## Warning: Expecting logical in P2794 / R2794C16: got 'TNGT'

## Warning: Expecting logical in P2795 / R2795C16: got 'Tai nạn giao thông'

## Warning: Expecting logical in P2798 / R2798C16: got 'TNGT'

## Warning: Expecting logical in P2799 / R2799C16: got 'Tai nạn giao thông'

## Warning: Expecting logical in P2816 / R2816C16: got 'TNGT'

## Warning: Expecting logical in P2817 / R2817C16: got 'Tai nạn giao thông'

## Warning: Expecting logical in AS2820 / R2820C45: got '3'

## Warning: Expecting logical in AS2821 / R2821C45: got '3'

## Warning: Expecting logical in AS3412 / R3412C45: got '1'

## Warning: Expecting logical in AS3418 / R3418C45: got '1'

## Warning: Expecting logical in AS3445 / R3445C45: got '1'

## Warning: Expecting logical in AS3455 / R3455C45: got '1'

## Warning: Expecting logical in AS3542 / R3542C45: got '1'

## Warning: Expecting logical in P3634 / R3634C16: got 'Khoa CSSKSS'

## Warning: Expecting logical in P3635 / R3635C16: got 'Khoa CSSKSS'

## Warning: Expecting logical in P3788 / R3788C16: got 'TNGT'

## Warning: Expecting logical in P3789 / R3789C16: got 'TNGT'

## Warning: Expecting logical in P3806 / R3806C16: got 'TNGT'

## Warning: Expecting logical in P3807 / R3807C16: got 'TNGT'

## Warning: Expecting logical in P3808 / R3808C16: got 'TNGT'

## Warning: Expecting logical in P3809 / R3809C16: got 'TNGT'

## Warning: Expecting logical in P3816 / R3816C16: got 'TNGT'

## Warning: Expecting logical in P3817 / R3817C16: got 'TNGT'

## Warning: Expecting logical in P3826 / R3826C16: got 'TNGT'

## Warning: Expecting logical in P3827 / R3827C16: got 'TNGT'

## Warning: Expecting logical in P3846 / R3846C16: got 'TNGT'

## Warning: Expecting logical in P3847 / R3847C16: got 'Tai nạn giao thông'

## Warning: Expecting logical in P3852 / R3852C16: got 'tai nạn lao động'

## Warning: Expecting logical in P3853 / R3853C16: got 'Tai nạn lao động'

## Warning: Expecting logical in P3854 / R3854C16: got 'TNGT'

## Warning: Expecting logical in P3855 / R3855C16: got 'Tai nạn giao thông'

## Warning: Expecting logical in P3864 / R3864C16: got 'TNGT'

## Warning: Expecting logical in P3865 / R3865C16: got 'Tai nạn giao thông'

## Warning: Expecting logical in P3866 / R3866C16: got 'TNGT'

## Warning: Expecting logical in P3867 / R3867C16: got 'TNGT'

## Warning: Expecting logical in H1010 / R1010C8: got 'Học sinh'

## Warning: Expecting logical in H1011 / R1011C8: got 'Học sinh'

## Warning: Expecting logical in H1068 / R1068C8: got 'sống chung nhưng không kết
## hôn'

## Warning: Expecting logical in H1069 / R1069C8: got 'Sống chung nhưng không kết
## hôn'

## Warning: Expecting logical in H2840 / R2840C8: got '.'

## Warning: Expecting logical in O2204 / R2204C15: got a date

## Warning: Expecting logical in P2204 / R2204C16: got a date

## Warning: Expecting logical in O2918 / R2918C15: got a date

## Warning: Expecting logical in P2918 / R2918C16: got a date

## Warning: Expecting logical in O2919 / R2919C15: got a date

## Warning: Expecting logical in P2919 / R2919C16: got a date

## Warning: Expecting logical in O3064 / R3064C15: got a date

## Warning: Expecting logical in P3064 / R3064C16: got a date

## Warning: Expecting logical in O3065 / R3065C15: got a date

## Warning: Expecting logical in P3065 / R3065C16: got a date

## Warning: Expecting logical in O3180 / R3180C15: got a date

## Warning: Expecting logical in P3180 / R3180C16: got a date

## Warning: Expecting logical in O3181 / R3181C15: got a date

## Warning: Expecting logical in P3181 / R3181C16: got a date

cre <- read_excel("C:/Users/huongvtl/Dropbox/Manuscript_MicroBaseline_15DEC2025/60HN_ID_MALDITOF_confirmation_20251128.xlsx")

discharge_date <- read_excel("C:/Users/huongvtl/Dropbox/Manuscript_MicroBaseline_15DEC2025/60HN - DischargeDate_no_discharge_sample_27Nov25.xlsx")

4.2 CRE datasets

CRE_admission <- subset(cre, cre$SampleSchedule =='ADMISSION'); 
CRE_admission <- CRE_admission %>%
  filter(Identification_MALDITOF %ni% c('Aeromonas caviae', 'Aeromonas hydrophila','Aeromonas veronii', 'Enterococcus faecium'))

CRE_discharge <- subset(cre, cre$SampleSchedule =='DISCHARGE')
CRE_discharge <- CRE_discharge %>%
  filter(Identification_MALDITOF %ni% c('Acinetobacter baumannii', 'Aeromonas caviae',
                                        'Aeromonas hydrophila', 'Aeromonas veronii',
                                        "Cupriavidus gilardii", "Enterococcus faecium",
                                        "Pseudomonas putida"))


length(unique(CRE_admission$USUBJID)); length(unique(CRE_discharge$USUBJID))

## [1] 361

## [1] 677

CRE_admission$hospitaltype<- ifelse(CRE_admission$SiteID %in% c('010','159'), 1, 2)
CRE_discharge$hospitaltype<- ifelse(CRE_discharge$SiteID %in% c('010','159'), 1, 2)

## subset to specific CREC or CRKP
CREC_admission <- CRE_admission %>%
  filter(Identification_MALDITOF == "Escherichia coli") %>%
  select(USUBJID, SPEC_DATE)
CREC_admission <- CREC_admission[!duplicated(CREC_admission$USUBJID),]

CREC_discharge <- CRE_discharge %>%
  filter(Identification_MALDITOF == "Escherichia coli") %>%
  select(USUBJID, SPEC_DATE)
CREC_discharge <- CREC_discharge[!duplicated(CREC_discharge$USUBJID),]


CRKP_admission <- CRE_admission %>%
  filter(Identification_MALDITOF == "Klebsiella pneumoniae") %>%
  select(USUBJID, SPEC_DATE)
CRKP_admission <- CRKP_admission[!duplicated(CRKP_admission$USUBJID),]

CRKP_discharge <- CRE_discharge %>%
  filter(Identification_MALDITOF == "Klebsiella pneumoniae") %>%
  select(USUBJID, SPEC_DATE)
CRKP_discharge <- CRKP_discharge[!duplicated(CRKP_discharge$USUBJID),]

4.3 Admission dataset

survey[["ADM"]] <- survey[["ADM"]] %>% filter(entry ==2) %>%
  mutate(hospitaltype = ifelse(SITEID %in% c('010','159'), 1, 2),
         age = 2025-YEAR_BIRTH +1,
         Ab_history = ifelse(ANB_ADMISSION %in% c(1,4),0,1),
         wardtype = as.factor(ifelse(WARD_3==1, "surgical", "medical/ID")))

survey[["SCR"]] <- survey[["SCR"]]%>% filter(entry==2) %>%
  mutate(discharge_date = SPEC_DATE_DISCHARGE) 

discharge_date <-merge(discharge_date,survey[["SCR"]], by = 'USUBJID', all = T )

discharge_date <- discharge_date %>%
  mutate(DISCHARGE_DATE = ifelse(is.na(discharge_date), `Discharge date`,discharge_date)) %>%
  select(USUBJID, DISCHARGE_DATE)

admission <- merge(survey[["ADM"]],discharge_date, by = 'USUBJID', all = T )
admission <- admission %>%
  mutate(los = as.numeric((as.POSIXct(DISCHARGE_DATE) - as.POSIXct(ADMISSION_DATE))/86400+1))

4.4 General info dataset

survey[["DEMO_HIS"]] <- survey[["DEMO_HIS"]] %>% filter(entry ==2) %>%
  mutate(JOB_final= case_when(JOB_SPECIFY %in% c("Cán bộ công chức", "Cán bộ xã") ~ 1,
            JOB_SPECIFY %in% c("Bảo vệ", "Cơ khí","Hộ lý", "Lái tàu thủy", "Lao động tự do","Vệ sỹ") ~ 4,
            JOB_SPECIFY %in% c("già", "Già","Hết độ tuổi lao động") ~ 8,
            JOB_SPECIFY %in% c("Ở nhà", "Tự do") ~ 9,
            .default = as.numeric(JOB)),
         householdsize = ADULT+CHILDREN,
         ALCOHOL_CUTDOWN_score = ifelse(ALCOHOL_CUTDOWN == 1, 0, 1),
         ALCOHOL_ANNOYED_score = ifelse(ALCOHOL_ANNOYED == 1, 0, 1),
         ALCOHOL_GUILTY_score = ifelse(ALCOHOL_GUILTY == 1, 0, 1),
         ALCOHOL_DRUG_score = ifelse(ALCOHOL_DRUG == 1, 0, 1),
         DRUG_CUTDOWN_score = ifelse(DRUG_CUTDOWN == 1, 0, 1),
         DRUG_ANNOYED_score = ifelse(DRUG_ANNOYED == 1, 0, 1),
         DRUG_GUILTY_score = ifelse(DRUG_GUILTY == 1, 0, 1),
         DRUG_DRUG_score = ifelse(DRUG_DRUG == 1, 0, 1),
         smoking = as.factor(ifelse(SMOKING == 1, "current smoker", ifelse(SMOKING == 2, "former smoker", "all non-smokers"))),
         children = as.factor(ifelse(CHILDREN == 0, "no children in the household", "with children in the household")),
         married = as.factor(ifelse(MARITAL ==1, "married", "other")),
         ambulatory = as.factor(ifelse(AMBULATORY == 1, "independent", "not independent"))
         )

survey[["DEMO_HIS"]] <- survey[["DEMO_HIS"]] %>% filter(entry ==2) %>%
  mutate(
    AlcoholCAGE = ALCOHOL_CUTDOWN_score+ALCOHOL_ANNOYED_score+ALCOHOL_GUILTY_score+ALCOHOL_DRUG_score, 
    DrugCAGE= DRUG_CUTDOWN_score+DRUG_ANNOYED_score+DRUG_GUILTY_score+DRUG_DRUG_score,
    job = as.factor(ifelse(JOB_final== 6, "farmer",ifelse(JOB_final== 7,"housewife",ifelse(JOB_final == 8, "retired", "all other jobs")))),
    )

survey[["DEMO_HIS"]] <- survey[["DEMO_HIS"]] %>% filter(entry ==2) %>%
  mutate(
    alcohol = as.factor(ifelse(AlcoholCAGE == 0, "0 problem", "1-4 problem")),
    drug = as.factor(ifelse(DrugCAGE == 0, "0 problem", "1-4 problem"))
  )

4.5 Other history dataset

InvasiveHistory <- survey[["DEMO_HIS_HISTORY_GRID"]] %>% filter(entry ==2) %>%
  filter(HIS_CONDITION ==1) %>%
  mutate(InvasiveHistory=as.factor(ifelse(HIS_YN == 2, 'Yes', 'No')))
         
MedicationHistory <- survey[["DEMO_HIS_HISTORY_GRID"]] %>% filter(entry ==2) %>%
  filter(HIS_CONDITION ==2)%>%
  mutate(MedicationHistory=as.factor(ifelse(HIS_YN == 2, 'Yes', 'No')))

TravelHistory <- survey[["DEMO_HIS_HISTORY_GRID"]] %>% filter(entry ==2) %>%
  filter(HIS_CONDITION ==3)%>%
  mutate(TravelHistory=as.factor(ifelse(HIS_YN == 2, 'Yes', 'No')))

HealthcareHistory <- survey[["DEMO_HIS_HISTORY_GRID"]] %>% filter(entry ==2) %>%
  filter(HIS_CONDITION ==4)%>%
  mutate(HealthcareHistory=as.factor(ifelse(HIS_YN == 2, 'Yes', 'No')))

4.6 Discharge dataset

discharge <- survey[["DIS"]] %>% filter(entry ==2) %>%
  mutate(
    ICDGroup = as.factor(case_when(
      FINAL_DIAG_ICD10 %in% c('A04','A04.8','A04.9','A05.9','A06.4','A08.0', 'A08.5','A09','A09.0', 'A09.9')~'ID (Intestinal)',
    FINAL_DIAG_ICD10 %in% c('A15.0', 'A15.3', 'A16.2', 'A41', 'A41.5', 'A41.8', 'A41.9','A48','A49.9', 'A79.8','A79.9', 'A93', 'A94', 'A97','B00.1', 'B01','B01.2', 'B01.8', 'B02', 'B02.7', 'B02.8','B05', 'B05.2', 'B05.8', 'B05.9', 'B17', 'B18.1', 'B18.2', 'B34.9', 'B83.8') ~ 'ID (Other)',
    FINAL_DIAG_ICD10 %in% c('C18', 'C18.9; K56.6', 'C20','C22','C22.9', 'C34.9', 'C41', 'C61', 'C67.9', 'C70.0','C91.9', 'C92.2', 'C95.0', 'D12.0', 'D17', 'D21', 'D21.9', 'D23', 'D23.9', 'D27','D37.4', 'D38.1', 'D40.0', 'D44.0', 'D48.1')~'Neoplasms',
    FINAL_DIAG_ICD10 %in% c('I10', 'I10-E11-E25-I69', 'I10; H81', 'I11.9', 'I12.5', 'I18', 'I20', 'I20.0','I20.8','I21', 'I21.4', 'I21.9', 'I24.0', 'I25', 'I25.2', 'I25.5', 'I48', 'I48.0','I48.2','I50','I50.9','I61', 'I63','I63.8', 'I64','I67','I67-I10', 'I67 - I10','I69.4', 'I70.2', 'I80', 'I87.2','I88.8', 'I92.9', 'I95', 'I99') ~ 'Circulatory diseases',
    FINAL_DIAG_ICD10 %in% c('J01','J01.9','J02','J02.9','J04.0','J06','J06.9','J09', 'J10.1','J10.8','J11','J11.1','J11.8', 'J15', 'J15.9', 'J16', 'J17.1', 'J18', 'J18-I20.2', 'J18.0', 'J18.1', 'J18.8', 'J18.9', 'J20', 'J20.9', 'J40', 'J42', 'J44', 'J44.0', 'J44.1', 'J44.9', 'J45', 'J90','J96.0', 'J96.9','J96/18') ~ 'Respiratory diseases',
   FINAL_DIAG_ICD10 %in% c('K11.2','K12.3', 'K21', 'K21.0', 'K21.9', 'K22.6', 'K25', 'K25.0', 'K25.3', 'K25.9','K26.0', 'K27', 'K27-J20', 'K27.0', 'K27.3', 'K27.9', 'K29', 'K29.0', 'K29.1','K29.2','K29.6', 'K29.9','K30', 'K35', 'K35.1', 'K35.3','K35.5','K35.8', 'K35.9','K36', 'K40', 'K40.9', 'K42', 'K50', 'K52.3', 'K52.9', 'K56', 'K56.4', 'K56.6', 'K57.0', ' K57.3','K57.9','K58','K58.0', 'K59.1', 'K60.3', 'K61.0', 'K61.2', 'K62.1', 'K62.5', 'K63', 'K63.1','K63.5','K64', 'K64.2', 'K64.3', 'K64.8', 'K65', 'K71.2', 'K71.6', 'K72.0', 'K72.2', 'K73','K74', 'K74.0','K74.1','K74.2', 'K74.6', 'K75.8','K77','K78.6', 'K80', 'K80.0','K80.1','K80.2','K80.3','K80.4','K80.8', 'K81', 'K81.0','K83', 'K83.0','K85', 'K85.0', 'K85.8','K85.9', 'K86.1','K92','K92.2','K92.8','K92.9') ~ 'Digestive diseases',
   FINAL_DIAG_ICD10 %in%  c('N00','N13.2','N13.3','N13.6','N17','N17.9','N18','N18.4','N18.5','N18.9','N19','N20','N20.0','N20.1','N20.1, N23, N20.0, K21, M47','N20.2','N21.0', 'N21.1','N23','N28','N30.0', 'N30.8','N32.0', 'N34.1', 'N39.0', 'N40', 'N43.2','N45', 'N47','N48.1','N70') ~ 'Genitourinary diseases',

   TRUE ~ 'Other')),
   ICUduration = difftime(ICU_TO, ICU_FROM, units = 'days')+1,
   CVCduration = difftime(CVC_TO, CVC_FROM, units = 'days')+1,
   PVCduration = difftime(PVC_TO, PVC_FROM, units = 'days')+1,
   UCduration = difftime(UC_TO, UC_FROM, units = 'days')+1,
   IIDduration = difftime(IID_TO, IID_FROM, units = 'days')+1
   )

4.7 Antibiotic use during hospitalization

# Antibiotic use during hospitalization at patient level
antibiotic_hospital <- survey[["ANT"]] %>% filter(entry ==2) %>%
  mutate(antibiotic_hospital = ifelse(ANB_HOS == 3, "yes", "no"))

#antibiotic agents used in hospital, each patient can have more than one antibiotic agents
antibiotic_hospital_agents <- survey[["ANT_ANB_HOSP_GRID"]] %>% filter(entry ==2) %>%
  mutate(agent = ifelse(ANB_OTHER_SPEC %in% c('Cefoperazol','Cefoperazole', 'Cefoperazon', 'Cefoperazone'), 'Cefoperazone',
                        ifelse(ANB_OTHER_SPEC %in% c('Senitram'), 'Ampicillin_sulbactam',
                               ifelse(ANB_OTHER_SPEC %in% c('Cefamandol','Cefemandol'), 'Cefamandol',
                                      ifelse(ANB_OTHER_SPEC %in% c('Vitabactam'), 'Cefoperazone_sulbactam',
                                             ifelse(ANB_OTHER_SPEC %in% c('Cefazolin'), 'Cefazolin',
                                             ifelse(ANB_OTHER_SPEC %in% c('Cefprozil'), 'Cefprozil',
                                                    ifelse(ANB_OTHER_SPEC %in% c('Cefradin'), 'Cefradin',
                                                           ifelse(ANB_OTHER_SPEC %in% c('Ceftizoxim', 'Ceftizoxime', 'Ceftizoxom', 'Ceftrizoxim'), 'Ceftizoxime',
                                                                  ifelse(ANB_OTHER_SPEC %in% c('Cefprozil'), 'Cefprozil',
                                                                  ifelse(ANB_OTHER_SPEC %in% c('Cefmetazol'), 'Cefmetazol', 
                                                                         ifelse(ANB_OTHER_SPEC %in% c('Ticarcillin + Acid clavulanic', 'Ticarcillin + Clavulanic', 'Ticarcillin/acid clavulanic'), 'Ticarcillin/acid clavulanic',
                                 ANB_HOSP_NAME))))))))))),
         number = 1,
         DOT = as.numeric((ANB_HOSP_END_DATE-ANB_HOSP_START_DATE)/86400 +1))



# calculate DOT of antibiotics used in hospital for each patient, at patient level
antibiotic_hospital_DOT <- antibiotic_hospital_agents %>%
  group_by(USUBJID) %>%
  summarise(number = sum(number), DOT=sum(DOT))

4.8 merge all data in the CRF

#select the variables that are relevant for further modeling

all_data <- merge(admission, survey[["DEMO_HIS"]][,c("USUBJID","smoking","children","married","ambulatory","job","alcohol","drug")], by = "USUBJID", all=T )
all_data<- merge(all_data, InvasiveHistory[,c("USUBJID","InvasiveHistory")], by = "USUBJID", all=T )
all_data<- merge(all_data, MedicationHistory[,c("USUBJID","MedicationHistory")], by = "USUBJID", all=T )
all_data<- merge(all_data, TravelHistory[,c("USUBJID","TravelHistory")], by = "USUBJID", all=T )
all_data<- merge(all_data, HealthcareHistory[,c("USUBJID","HealthcareHistory")], by = "USUBJID", all=T )
all_data<- merge(all_data, discharge[,c("USUBJID","ICDGroup","ICUduration","CVCduration","PVCduration","UCduration", "IIDduration",
                                        "DISCHARGE","ICU","CVC","PVC","UC","IID")], by = "USUBJID", all=T )
all_data<- merge(all_data, antibiotic_hospital[, c("USUBJID","antibiotic_hospital")], by = "USUBJID", all=T )
all_data<- merge(all_data, antibiotic_hospital_DOT, by = "USUBJID", all=T )


# to add CRE data depending on type of carriage examined for further analysis

5 Prepare risk datasets for Lasso modeling

riskdata <- merge(all_data, CREC_admission, by ="USUBJID", all=T )
riskdata <- riskdata %>%
  mutate(CRE_carriage = ifelse(is.na(SPEC_DATE), 0, 1),
         smoking2 = as.factor(ifelse(smoking == "all non-smokers", "all non-smokers", "smokers")),
         children2 = as.factor(ifelse(is.na(children), 1, children)), 
         # NA to be classified as 1= having no children living in the house; 2 = having children living in the house
         job_farmer = as.factor(ifelse(job == "farmer", "farmers", "allothers")), #1=others, 3=housewife 4= retired 2 = farmers
         job_housewife = as.factor(ifelse(job == "housewife", "housewives", "allothers")), #1=others, 3=housewife 4= retired 2 = farmers
         job_retired = as.factor(ifelse(job == "retired", "retired", "allothers")), #1=others, 3=housewife 4= retired 2 = farmers
         heart_diseases = as.factor(ifelse(as.factor(MI_HIS+CHF)==0, 0, 1)),
         vascular_diseases = as.factor(ifelse(as.factor(CVD+PVD)==0, 0,1)),
         cancer = as.factor(TUMOR_NOMETA + LEUKEMIA + LYMPHOMA + META_TUMOR),
         ICD_Circulatory = as.factor(ifelse(ICDGroup == "Circulatory diseases", "Circulatory diseases", "allothers")), 
         ICD_Genitourinary = as.factor(ifelse(ICDGroup == "Genitourinary diseases", "Genitourinary diseases", "allothers")),
         ICD_IDIntestinal = as.factor(ifelse(ICDGroup == "ID (Intestinal)", "ID (Intestinal)", "allothers")),
         ICD_IDOther = as.factor(ifelse(ICDGroup == "ID (Other)", "ID (Other)", "allothers")),
         ICD_Neoplasms = as.factor(ifelse(ICDGroup == "Neoplasms", "Neoplasms", "allothers")),
         ICD_Digestive = as.factor(ifelse(ICDGroup == "Digestive diseases", "Digestive diseases", "allothers")),
         ICD_Respiratory = as.factor(ifelse(ICDGroup == "Respiratory diseases", "Respiratory diseases", "allothers")),
         )

6 Prepare variables for Lasso modeling

###standardize predictor variables before Lasso regression because the penalty is applied to the magnitude of coefficients; standardizing (to mean 0, std dev 1) ensures all variables are treated equally, allowing for meaningful coefficient comparison and selection based on true predictive power, not scale.
##Check Defaults: Confirm glmmLassoControl settings; standardize = TRUE is the standard for 2026 to ensure the L1 penalty is consistent.
##Factor Recognition: Ensure categorical variables are recognized as factors so the algorithm can apply group-wise updates. 

#for continuous variables - check if re-scaled back when interpreting

riskdata_scaled <- 
  riskdata %>% 
  mutate(
    age_s = scale(age), 
    gender = as.factor(GENDER),
    hospital = as.factor(SITEID),
    cormobidity = as.factor(CORMOBIDITY),
    dementia = as.factor(DEMENTIA),
    cpd = as.factor(CPD),
    ctd = as.factor(CTD),
    pud = as.factor(PUD),
    mild_liver = as.factor(MILD_LIVER),
    sev_liver = as.factor(SEV_LIVER),
    hemiplegia = as.factor(HEMIPLEGIA),
    sev_renal = as.factor(SEV_RENAL),
    diabetes_noeod = as.factor(DIAB_NOEOD),
    diabetes_eod = as.factor(DIAB_EOD),
  )

#no patient has AIDS, also not include pvd as no cases have pvd

## to fix NAs and check data
vars_to_keep <- c("CRE_carriage", "age_s", "gender", "smoking2","hospital", "SITEID", "children2", "married", "ambulatory", "job_farmer", "job_housewife", "job_retired", "cormobidity", "heart_diseases", "vascular_diseases", "dementia", "cpd", "ctd","pud","mild_liver", "sev_liver", "hemiplegia", "sev_renal", "diabetes_noeod","diabetes_eod", "cancer","alcohol", "drug","wardtype","Ab_history", "InvasiveHistory", "MedicationHistory","HealthcareHistory", "hospitaltype", "ICD_Circulatory", "ICD_Genitourinary", "ICD_IDIntestinal", "ICD_IDOther", "ICD_Neoplasms", "ICD_Respiratory", "ICD_Digestive")

data_clean <- riskdata_scaled[complete.cases(riskdata_scaled[,vars_to_keep]),]

print(nrow(riskdata_scaled) - nrow(data_clean)) # Check how many rows you lost

## [1] 27

7 Fit a simple glmm model

mixed_formula <- CRE_carriage ~ age_s +gender+smoking2+children2+married+ambulatory+
  job_farmer+job_housewife+job_retired+cormobidity+
  vascular_diseases+cpd+ctd+pud+mild_liver+sev_liver+hemiplegia+sev_renal+diabetes_noeod+diabetes_eod+cancer+
  alcohol +drug+wardtype+InvasiveHistory +MedicationHistory+HealthcareHistory + hospitaltype+ Ab_history+
  ICD_Circulatory + ICD_Genitourinary + ICD_IDIntestinal + ICD_IDOther +
  ICD_Neoplasms+ICD_Respiratory+ICD_Digestive +( 1| SITEID) #not include heart_diseases and dementia yet -


simple_mixed_model <- glmer(mixed_formula, data = data_clean, family = binomial(link = "logit"))
summary(simple_mixed_model)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: CRE_carriage ~ age_s + gender + smoking2 + children2 + married +  
##     ambulatory + job_farmer + job_housewife + job_retired + cormobidity +  
##     vascular_diseases + cpd + ctd + pud + mild_liver + sev_liver +  
##     hemiplegia + sev_renal + diabetes_noeod + diabetes_eod +  
##     cancer + alcohol + drug + wardtype + InvasiveHistory + MedicationHistory +  
##     HealthcareHistory + hospitaltype + Ab_history + ICD_Circulatory +  
##     ICD_Genitourinary + ICD_IDIntestinal + ICD_IDOther + ICD_Neoplasms +  
##     ICD_Respiratory + ICD_Digestive + (1 | SITEID)
##    Data: data_clean
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    1670.3    1882.6    -797.1    1594.3      1935 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.0742 -0.4458 -0.3612 -0.2824  4.7425 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  SITEID (Intercept) 0.05936  0.2436  
## Number of obs: 1973, groups:  SITEID, 12
## 
## Fixed effects:
##                                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)                             -3.04863    0.55648  -5.478 4.29e-08
## age_s                                   -0.08125    0.08753  -0.928 0.353234
## gender2                                  0.09374    0.20118   0.466 0.641259
## smoking2smokers                          0.19723    0.20733   0.951 0.341450
## children22                               0.29312    0.13522   2.168 0.030185
## marriedother                             0.03261    0.18521   0.176 0.860219
## ambulatorynot independent                0.26976    0.31857   0.847 0.397116
## job_farmerfarmers                        0.34261    0.17785   1.926 0.054056
## job_housewifehousewives                  0.80347    0.24365   3.298 0.000975
## job_retiredretired                       0.29326    0.22513   1.303 0.192706
## cormobidity2                            -0.05679    0.18022  -0.315 0.752692
## vascular_diseases1                      -0.56210    0.46530  -1.208 0.227029
## cpdTRUE                                  1.13343    0.35865   3.160 0.001576
## ctdTRUE                                  0.06556    0.79168   0.083 0.934002
## pudTRUE                                 -0.75411    0.54187  -1.392 0.164019
## mild_liverTRUE                          -0.32935    0.55631  -0.592 0.553830
## sev_liverTRUE                            0.24569    0.58149   0.423 0.672649
## hemiplegiaTRUE                           0.87307    0.65513   1.333 0.182640
## sev_renalTRUE                            0.53548    0.40039   1.337 0.181095
## diabetes_noeodTRUE                       0.36766    0.20074   1.832 0.067016
## diabetes_eodTRUE                         1.01396    0.37583   2.698 0.006977
## cancer1                                 -1.01326    0.77143  -1.313 0.189016
## alcohol1-4 problem                       0.27727    0.22047   1.258 0.208521
## drug1-4 problem                         -0.31395    0.44966  -0.698 0.485054
## wardtypesurgical                         0.54441    0.17389   3.131 0.001743
## InvasiveHistoryYes                       0.45695    0.25613   1.784 0.074412
## MedicationHistoryYes                    -0.03693    0.17176  -0.215 0.829776
## HealthcareHistoryYes                     0.24147    0.19403   1.245 0.213315
## hospitaltype                             0.07805    0.25385   0.307 0.758502
## Ab_history                               0.20267    0.18323   1.106 0.268673
## ICD_CirculatoryCirculatory diseases      0.35174    0.24942   1.410 0.158464
## ICD_GenitourinaryGenitourinary diseases  0.23871    0.24904   0.959 0.337803
## ICD_IDIntestinalID (Intestinal)          0.81339    0.27390   2.970 0.002982
## ICD_IDOtherID (Other)                    0.07753    0.39353   0.197 0.843822
## ICD_NeoplasmsNeoplasms                  -0.12308    0.41420  -0.297 0.766344
## ICD_RespiratoryRespiratory diseases      0.41134    0.23511   1.750 0.080196
## ICD_DigestiveDigestive diseases          0.11174    0.20634   0.542 0.588136
##                                            
## (Intercept)                             ***
## age_s                                      
## gender2                                    
## smoking2smokers                            
## children22                              *  
## marriedother                               
## ambulatorynot independent                  
## job_farmerfarmers                       .  
## job_housewifehousewives                 ***
## job_retiredretired                         
## cormobidity2                               
## vascular_diseases1                         
## cpdTRUE                                 ** 
## ctdTRUE                                    
## pudTRUE                                    
## mild_liverTRUE                             
## sev_liverTRUE                              
## hemiplegiaTRUE                             
## sev_renalTRUE                              
## diabetes_noeodTRUE                      .  
## diabetes_eodTRUE                        ** 
## cancer1                                    
## alcohol1-4 problem                         
## drug1-4 problem                            
## wardtypesurgical                        ** 
## InvasiveHistoryYes                      .  
## MedicationHistoryYes                       
## HealthcareHistoryYes                       
## hospitaltype                               
## Ab_history                                 
## ICD_CirculatoryCirculatory diseases        
## ICD_GenitourinaryGenitourinary diseases    
## ICD_IDIntestinalID (Intestinal)         ** 
## ICD_IDOtherID (Other)                      
## ICD_NeoplasmsNeoplasms                     
## ICD_RespiratoryRespiratory diseases     .  
## ICD_DigestiveDigestive diseases            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 37 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

initial_fixed_estimates <- fixef(simple_mixed_model)
initial_random_estimates <- ranef(simple_mixed_model)
Delta.start<-c(as.numeric(initial_fixed_estimates),rep(0,100),
               as.numeric(initial_random_estimates))
Q.start<-as.numeric(VarCorr(simple_mixed_model))

8 Find the best lambda for glmmLasso

# Define a sequence of lambda values (from high penalty to low)
lambda_seq <- seq(10, 0, by = -0.1)

#fixed formula
fixed_formula <- CRE_carriage ~ age_s +gender+smoking2+children2+married+ambulatory+
  job_farmer+job_housewife+job_retired+cormobidity+
  vascular_diseases+cpd+ctd+pud+mild_liver+sev_liver+hemiplegia+sev_renal+diabetes_noeod+diabetes_eod+cancer+
  alcohol +drug+wardtype+InvasiveHistory +MedicationHistory+HealthcareHistory + hospitaltype+ Ab_history+
  ICD_Circulatory + ICD_Genitourinary + ICD_IDIntestinal + ICD_IDOther +
  ICD_Neoplasms+ICD_Respiratory+ICD_Digestive

# Initialize a vector to store BIC or AIC values
baic_vec <- rep(Inf, length(lambda_seq))

# Create a loop to test each lambda
for(j in 1:length(lambda_seq)){
  print(paste("Testing Lambda:", lambda_seq[j]))
  #Use try() to prevent the loop from breaking if one model fails to converge
  glmm_temp <- try(glmmLasso(
    fix = fixed_formula,
    rnd = list(hospital=~1),
    data = data_clean,
    lambda = lambda_seq[j],
    family = binomial(),
    switch.NR = TRUE,
    final.re = FALSE,
    control=list(start=Delta.start,q_start=Q.start)),
    silent = TRUE)
  #If the model fit successfully, save the BIC
  if(!inherits(glmm_temp, "try-error")){
    baic_vec[j] <- glmm_temp$bic
  }
}

## [1] "Testing Lambda: 10"
## [1] "Testing Lambda: 9.9"
## [1] "Testing Lambda: 9.8"
## [1] "Testing Lambda: 9.7"
## [1] "Testing Lambda: 9.6"
## [1] "Testing Lambda: 9.5"
## [1] "Testing Lambda: 9.4"
## [1] "Testing Lambda: 9.3"
## [1] "Testing Lambda: 9.2"
## [1] "Testing Lambda: 9.1"
## [1] "Testing Lambda: 9"
## [1] "Testing Lambda: 8.9"
## [1] "Testing Lambda: 8.8"
## [1] "Testing Lambda: 8.7"
## [1] "Testing Lambda: 8.6"
## [1] "Testing Lambda: 8.5"
## [1] "Testing Lambda: 8.4"
## [1] "Testing Lambda: 8.3"
## [1] "Testing Lambda: 8.2"
## [1] "Testing Lambda: 8.1"
## [1] "Testing Lambda: 8"
## [1] "Testing Lambda: 7.9"
## [1] "Testing Lambda: 7.8"
## [1] "Testing Lambda: 7.7"
## [1] "Testing Lambda: 7.6"
## [1] "Testing Lambda: 7.5"
## [1] "Testing Lambda: 7.4"
## [1] "Testing Lambda: 7.3"
## [1] "Testing Lambda: 7.2"
## [1] "Testing Lambda: 7.1"
## [1] "Testing Lambda: 7"
## [1] "Testing Lambda: 6.9"
## [1] "Testing Lambda: 6.8"
## [1] "Testing Lambda: 6.7"
## [1] "Testing Lambda: 6.6"
## [1] "Testing Lambda: 6.5"
## [1] "Testing Lambda: 6.4"
## [1] "Testing Lambda: 6.3"
## [1] "Testing Lambda: 6.2"
## [1] "Testing Lambda: 6.1"
## [1] "Testing Lambda: 6"
## [1] "Testing Lambda: 5.9"
## [1] "Testing Lambda: 5.8"
## [1] "Testing Lambda: 5.7"
## [1] "Testing Lambda: 5.6"
## [1] "Testing Lambda: 5.5"
## [1] "Testing Lambda: 5.4"
## [1] "Testing Lambda: 5.3"
## [1] "Testing Lambda: 5.2"
## [1] "Testing Lambda: 5.1"
## [1] "Testing Lambda: 5"
## [1] "Testing Lambda: 4.9"
## [1] "Testing Lambda: 4.8"
## [1] "Testing Lambda: 4.7"
## [1] "Testing Lambda: 4.6"
## [1] "Testing Lambda: 4.5"
## [1] "Testing Lambda: 4.4"
## [1] "Testing Lambda: 4.3"
## [1] "Testing Lambda: 4.2"
## [1] "Testing Lambda: 4.1"
## [1] "Testing Lambda: 4"
## [1] "Testing Lambda: 3.9"
## [1] "Testing Lambda: 3.8"
## [1] "Testing Lambda: 3.7"
## [1] "Testing Lambda: 3.6"
## [1] "Testing Lambda: 3.5"
## [1] "Testing Lambda: 3.4"
## [1] "Testing Lambda: 3.3"
## [1] "Testing Lambda: 3.2"
## [1] "Testing Lambda: 3.1"
## [1] "Testing Lambda: 3"
## [1] "Testing Lambda: 2.9"
## [1] "Testing Lambda: 2.8"
## [1] "Testing Lambda: 2.7"
## [1] "Testing Lambda: 2.6"
## [1] "Testing Lambda: 2.5"
## [1] "Testing Lambda: 2.4"
## [1] "Testing Lambda: 2.3"
## [1] "Testing Lambda: 2.2"
## [1] "Testing Lambda: 2.1"
## [1] "Testing Lambda: 2"
## [1] "Testing Lambda: 1.9"
## [1] "Testing Lambda: 1.8"
## [1] "Testing Lambda: 1.7"
## [1] "Testing Lambda: 1.6"
## [1] "Testing Lambda: 1.5"
## [1] "Testing Lambda: 1.4"
## [1] "Testing Lambda: 1.3"
## [1] "Testing Lambda: 1.2"
## [1] "Testing Lambda: 1.1"
## [1] "Testing Lambda: 1"
## [1] "Testing Lambda: 0.9"
## [1] "Testing Lambda: 0.799999999999999"
## [1] "Testing Lambda: 0.699999999999999"
## [1] "Testing Lambda: 0.6"
## [1] "Testing Lambda: 0.5"
## [1] "Testing Lambda: 0.399999999999999"
## [1] "Testing Lambda: 0.299999999999999"
## [1] "Testing Lambda: 0.199999999999999"
## [1] "Testing Lambda: 0.0999999999999996"
## [1] "Testing Lambda: 0"

# Find the lambda that produced the lowest BIC
opt_lambda <- lambda_seq[which.min(baic_vec)]
print(paste("Optimal Lambda is:", opt_lambda))

## [1] "Optimal Lambda is: 9.7"

baic_vec

##   [1] 1822.034 1822.102 1822.113 1822.027 1825.502 1825.397 1825.293 1825.185
##   [9] 1857.073 1824.937 1824.791 1824.627 1824.448 1824.260 1824.069 1823.880
##  [17] 1823.729 1829.624 1861.217 1823.064 1829.180 1835.579 1829.174 1829.177
##  [25] 1829.181 1822.098 1822.103 1822.107 1835.439 1828.889 1835.450 1865.352
##  [33] 1829.198 1829.201 1829.194 1836.818 1835.431 1836.801 1843.034 1843.052
##  [41] 1842.992 1842.996 1843.054 2036.173 1836.791 1857.665 1849.305 1850.560
##  [49] 1871.829 1850.508 1855.952 1870.854 1863.542 1850.596 1850.605 1842.989
##  [57] 1850.608 1850.600 1850.609 1850.624 1850.633 1866.030 1865.548 2167.958
##  [65] 1850.559 1850.027 1850.603 1847.965 1946.747 1857.606 1862.996 1870.074
##  [73] 1866.586 1870.024 1877.561 1877.524 1877.589 1885.203 1877.593 1892.686
##  [81] 1885.046 1885.106 1885.051 1885.097 1885.038 1885.100 1892.668 1896.996
##  [89] 1892.722 1892.695 1892.670 1900.354 1900.356 1907.956 1900.306 1909.492
##  [97] 1908.015 1912.937 1907.215 1907.326 1907.835

failed_indices <- which(is.infinite(baic_vec))
print(lambda_seq[failed_indices])

## numeric(0)

9 Fit the final Lasso model

CRE_admission_model <- glmmLasso(
  fix=fixed_formula,
  rnd=list(hospital = ~1),
  data = data_clean,
  lambda = opt_lambda, # larger -> stronger regularization and more coefficients shrunk to zero; smaller reduces penalty
  family = binomial(link = logit),
  switch.NR=FALSE, # switch to the faster Newton-Raphson method to speed up model fitting process
  final.re=TRUE,
  control=list(start=Delta.start,q_start=Q.start)
  )

CRE_admission_model %>% summary()

## Call:
## glmmLasso(fix = fixed_formula, rnd = list(hospital = ~1), data = data_clean, 
##     lambda = opt_lambda, family = binomial(link = logit), switch.NR = FALSE, 
##     final.re = TRUE, control = list(start = Delta.start, q_start = Q.start))
## 
## 
## Fixed Effects:
## 
## Coefficients:
##                                          Estimate    StdErr  z.value   p.value
## (Intercept)                             -2.896614  0.105325 -27.5016 < 2.2e-16
## age_s                                   -0.080755  0.087546  -0.9224  0.356305
## gender2                                  0.096457  0.200970   0.4800  0.631256
## smoking2smokers                          0.200728  0.206691   0.9712  0.331473
## children22                               0.294096  0.135142   2.1762  0.029541
## marriedother                             0.032647  0.185451   0.1760  0.860263
## ambulatorynot independent                0.271756  0.318637   0.8529  0.393731
## job_farmerfarmers                        0.344491  0.177792   1.9376  0.052671
## job_housewifehousewives                  0.799224  0.243337   3.2844  0.001022
## job_retiredretired                       0.298668  0.224700   1.3292  0.183787
## cormobidity2                            -0.057584  0.180324  -0.3193  0.749471
## vascular_diseases1                      -0.562984  0.465321  -1.2099  0.226324
## cpdTRUE                                  1.129586  0.357920   3.1560  0.001600
## ctdTRUE                                  0.076895  0.791161   0.0972  0.922574
## pudTRUE                                 -0.743173  0.542339  -1.3703  0.170590
## mild_liverTRUE                          -0.317834  0.556119  -0.5715  0.567647
## sev_liverTRUE                            0.247695  0.580272   0.4269  0.669481
## hemiplegiaTRUE                           0.866535  0.654034   1.3249  0.185201
## sev_renalTRUE                            0.503581  0.394461   1.2766  0.201733
## diabetes_noeodTRUE                       0.363475  0.200314   1.8145  0.069597
## diabetes_eodTRUE                         1.014803  0.375207   2.7046  0.006838
## cancer1                                 -1.016400  0.772013  -1.3166  0.187987
## alcohol1-4 problem                       0.275713  0.220487   1.2505  0.211128
## drug1-4 problem                         -0.310287  0.450224  -0.6892  0.490708
## wardtypesurgical                         0.547903  0.173818   3.1522  0.001621
## InvasiveHistoryYes                       0.450874  0.256014   1.7611  0.078217
## MedicationHistoryYes                    -0.041735  0.171448  -0.2434  0.807675
## HealthcareHistoryYes                     0.244828  0.192925   1.2690  0.204429
## hospitaltype                             0.000000        NA       NA        NA
## Ab_history                               0.206719  0.182411   1.1333  0.257105
## ICD_CirculatoryCirculatory diseases      0.342531  0.247108   1.3862  0.165698
## ICD_GenitourinaryGenitourinary diseases  0.229770  0.248203   0.9257  0.354583
## ICD_IDIntestinalID (Intestinal)          0.794326  0.271597   2.9246  0.003448
## ICD_IDOtherID (Other)                    0.076582  0.391350   0.1957  0.844855
## ICD_NeoplasmsNeoplasms                  -0.131703  0.413808  -0.3183  0.750280
## ICD_RespiratoryRespiratory diseases      0.409848  0.235195   1.7426  0.081406
## ICD_DigestiveDigestive diseases          0.096540  0.203067   0.4754  0.634495
##                                            
## (Intercept)                             ***
## age_s                                      
## gender2                                    
## smoking2smokers                            
## children22                              *  
## marriedother                               
## ambulatorynot independent                  
## job_farmerfarmers                       .  
## job_housewifehousewives                 ** 
## job_retiredretired                         
## cormobidity2                               
## vascular_diseases1                         
## cpdTRUE                                 ** 
## ctdTRUE                                    
## pudTRUE                                    
## mild_liverTRUE                             
## sev_liverTRUE                              
## hemiplegiaTRUE                             
## sev_renalTRUE                              
## diabetes_noeodTRUE                      .  
## diabetes_eodTRUE                        ** 
## cancer1                                    
## alcohol1-4 problem                         
## drug1-4 problem                            
## wardtypesurgical                        ** 
## InvasiveHistoryYes                      .  
## MedicationHistoryYes                       
## HealthcareHistoryYes                       
## hospitaltype                               
## Ab_history                                 
## ICD_CirculatoryCirculatory diseases        
## ICD_GenitourinaryGenitourinary diseases    
## ICD_IDIntestinalID (Intestinal)         ** 
## ICD_IDOtherID (Other)                      
## ICD_NeoplasmsNeoplasms                     
## ICD_RespiratoryRespiratory diseases     .  
## ICD_DigestiveDigestive diseases            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Random Effects:
## 
## StdDev:
##           hospital
## hospital 0.2737507

model_summary <- summary(CRE_admission_model)

10 Final estimates

std_errors <- model_summary$coefficients[, "StdErr"]; 
tab <- cbind(Est = coef(CRE_admission_model), LL = coef(CRE_admission_model) - 1.96 * std_errors, UL = coef(CRE_admission_model) + 1.96 *std_errors); 

round(cbind(exp(tab),p=coef(summary(CRE_admission_model))[,4]),digits = 3) #p-value

##                                           Est    LL    UL     p
## (Intercept)                             0.055 0.045 0.068 0.000
## age_s                                   0.922 0.777 1.095 0.356
## gender2                                 1.101 0.743 1.633 0.631
## smoking2smokers                         1.222 0.815 1.833 0.331
## children22                              1.342 1.030 1.749 0.030
## marriedother                            1.033 0.718 1.486 0.860
## ambulatorynot independent               1.312 0.703 2.450 0.394
## job_farmerfarmers                       1.411 0.996 2.000 0.053
## job_housewifehousewives                 2.224 1.380 3.583 0.001
## job_retiredretired                      1.348 0.868 2.094 0.184
## cormobidity2                            0.944 0.663 1.344 0.749
## vascular_diseases1                      0.570 0.229 1.418 0.226
## cpdTRUE                                 3.094 1.534 6.241 0.002
## ctdTRUE                                 1.080 0.229 5.091 0.923
## pudTRUE                                 0.476 0.164 1.377 0.171
## mild_liverTRUE                          0.728 0.245 2.164 0.568
## sev_liverTRUE                           1.281 0.411 3.995 0.669
## hemiplegiaTRUE                          2.379 0.660 8.571 0.185
## sev_renalTRUE                           1.655 0.764 3.585 0.202
## diabetes_noeodTRUE                      1.438 0.971 2.130 0.070
## diabetes_eodTRUE                        2.759 1.322 5.756 0.007
## cancer1                                 0.362 0.080 1.643 0.188
## alcohol1-4 problem                      1.317 0.855 2.030 0.211
## drug1-4 problem                         0.733 0.303 1.772 0.491
## wardtypesurgical                        1.730 1.230 2.432 0.002
## InvasiveHistoryYes                      1.570 0.950 2.593 0.078
## MedicationHistoryYes                    0.959 0.685 1.342 0.808
## HealthcareHistoryYes                    1.277 0.875 1.864 0.204
## hospitaltype                            1.000    NA    NA    NA
## Ab_history                              1.230 0.860 1.758 0.257
## ICD_CirculatoryCirculatory diseases     1.409 0.868 2.286 0.166
## ICD_GenitourinaryGenitourinary diseases 1.258 0.774 2.047 0.355
## ICD_IDIntestinalID (Intestinal)         2.213 1.300 3.768 0.003
## ICD_IDOtherID (Other)                   1.080 0.501 2.325 0.845
## ICD_NeoplasmsNeoplasms                  0.877 0.390 1.973 0.750
## ICD_RespiratoryRespiratory diseases     1.507 0.950 2.389 0.081
## ICD_DigestiveDigestive diseases         1.101 0.740 1.640 0.634

60HN - ASPARNet study - CRE Admission Risk: Datasets, Variables and Models

Vu Thi Lan Huong

2026-01-20

1 Constants

2 Packages

3 Functions

4 Datasets and variables

4.1 Source data

4.2 CRE datasets

4.3 Admission dataset

4.4 General info dataset

4.5 Other history dataset

4.6 Discharge dataset

4.7 Antibiotic use during hospitalization

4.8 merge all data in the CRF

5 Prepare risk datasets for Lasso modeling

6 Prepare variables for Lasso modeling

7 Fit a simple glmm model

8 Find the best lambda for glmmLasso

9 Fit the final Lasso model

10 Final estimates