1 Constants

##still do not know how to do this effectively.
path2data <- "C:/Users/huongvtl/OneDrive - Oxford University Clinical Research Unit/60HN CRE transmission manuscript/"

2 Packages

3 Functions

'%ni%' <- Negate('%in%')

readxl_allsheet <- function(filename, tibble = FALSE) {
    # I prefer straight data.frames
    # but if you like tidyverse tibbles (the default with read_excel)
    # then just pass tibble = TRUE
    sheets <- readxl::excel_sheets(filename)
    x <- lapply(sheets, function(X) readxl::read_excel(filename, sheet = X))
    if(!tibble) x <- lapply(x, as.data.frame)
    names(x) <- sheets
    x
}

4 Datasets and variables

4.1 Source data

survey <- readxl_allsheet("C:/Users/huongvtl/Dropbox/Manuscript_MicroBaseline_15DEC2025/Data Export - 60HN_PATIENT - 16Dec2025/16-12-2025-_60HN_PATIENT_P1_Data.xls")
## Warning: Expecting logical in P1142 / R1142C16: got 'công ty'
## Warning: Expecting logical in P1143 / R1143C16: got 'Công ty'
## Warning: Expecting logical in AS1278 / R1278C45: got '3'
## Warning: Expecting logical in AS1279 / R1279C45: got '3'
## Warning: Expecting logical in P1346 / R1346C16: got 'TNGT, Quốc lộ 30'
## Warning: Expecting logical in P1347 / R1347C16: got 'TNGT quốc lộ 30'
## Warning: Expecting logical in P1362 / R1362C16: got 'TNGT'
## Warning: Expecting logical in P1363 / R1363C16: got 'TNGT'
## Warning: Expecting logical in AS1463 / R1463C45: got '3'
## Warning: Expecting logical in P1640 / R1640C16: got 'Ngoài đường'
## Warning: Expecting logical in P1641 / R1641C16: got 'Ngoài đường'
## Warning: Expecting logical in P1644 / R1644C16: got 'tai nạn ngoài đường'
## Warning: Expecting logical in P1645 / R1645C16: got 'Tai nạn ngoài đường'
## Warning: Expecting logical in AS2020 / R2020C45: got '3'
## Warning: Expecting logical in AS2252 / R2252C45: got '3'
## Warning: Expecting logical in AS2262 / R2262C45: got '3'
## Warning: Expecting logical in AS2293 / R2293C45: got '3'
## Warning: Expecting logical in AS2484 / R2484C45: got '3'
## Warning: Expecting logical in AS2485 / R2485C45: got '3'
## Warning: Expecting logical in AS2526 / R2526C45: got '3'
## Warning: Expecting logical in AS2527 / R2527C45: got '3'
## Warning: Expecting logical in AS2556 / R2556C45: got '3'
## Warning: Expecting logical in AS2557 / R2557C45: got '3'
## Warning: Expecting logical in AS2568 / R2568C45: got '3'
## Warning: Expecting logical in AS2569 / R2569C45: got '3'
## Warning: Expecting logical in AS2580 / R2580C45: got '3'
## Warning: Expecting logical in AS2581 / R2581C45: got '3'
## Warning: Expecting logical in AS2588 / R2588C45: got '3'
## Warning: Expecting logical in AS2589 / R2589C45: got '3'
## Warning: Expecting logical in AS2598 / R2598C45: got '3'
## Warning: Expecting logical in AS2599 / R2599C45: got '3'
## Warning: Expecting logical in AS2600 / R2600C45: got '3'
## Warning: Expecting logical in AS2601 / R2601C45: got '3'
## Warning: Expecting logical in P2794 / R2794C16: got 'TNGT'
## Warning: Expecting logical in P2795 / R2795C16: got 'Tai nạn giao thông'
## Warning: Expecting logical in P2798 / R2798C16: got 'TNGT'
## Warning: Expecting logical in P2799 / R2799C16: got 'Tai nạn giao thông'
## Warning: Expecting logical in P2816 / R2816C16: got 'TNGT'
## Warning: Expecting logical in P2817 / R2817C16: got 'Tai nạn giao thông'
## Warning: Expecting logical in AS2820 / R2820C45: got '3'
## Warning: Expecting logical in AS2821 / R2821C45: got '3'
## Warning: Expecting logical in AS3412 / R3412C45: got '1'
## Warning: Expecting logical in AS3418 / R3418C45: got '1'
## Warning: Expecting logical in AS3445 / R3445C45: got '1'
## Warning: Expecting logical in AS3455 / R3455C45: got '1'
## Warning: Expecting logical in AS3542 / R3542C45: got '1'
## Warning: Expecting logical in P3634 / R3634C16: got 'Khoa CSSKSS'
## Warning: Expecting logical in P3635 / R3635C16: got 'Khoa CSSKSS'
## Warning: Expecting logical in P3788 / R3788C16: got 'TNGT'
## Warning: Expecting logical in P3789 / R3789C16: got 'TNGT'
## Warning: Expecting logical in P3806 / R3806C16: got 'TNGT'
## Warning: Expecting logical in P3807 / R3807C16: got 'TNGT'
## Warning: Expecting logical in P3808 / R3808C16: got 'TNGT'
## Warning: Expecting logical in P3809 / R3809C16: got 'TNGT'
## Warning: Expecting logical in P3816 / R3816C16: got 'TNGT'
## Warning: Expecting logical in P3817 / R3817C16: got 'TNGT'
## Warning: Expecting logical in P3826 / R3826C16: got 'TNGT'
## Warning: Expecting logical in P3827 / R3827C16: got 'TNGT'
## Warning: Expecting logical in P3846 / R3846C16: got 'TNGT'
## Warning: Expecting logical in P3847 / R3847C16: got 'Tai nạn giao thông'
## Warning: Expecting logical in P3852 / R3852C16: got 'tai nạn lao động'
## Warning: Expecting logical in P3853 / R3853C16: got 'Tai nạn lao động'
## Warning: Expecting logical in P3854 / R3854C16: got 'TNGT'
## Warning: Expecting logical in P3855 / R3855C16: got 'Tai nạn giao thông'
## Warning: Expecting logical in P3864 / R3864C16: got 'TNGT'
## Warning: Expecting logical in P3865 / R3865C16: got 'Tai nạn giao thông'
## Warning: Expecting logical in P3866 / R3866C16: got 'TNGT'
## Warning: Expecting logical in P3867 / R3867C16: got 'TNGT'
## Warning: Expecting logical in H1010 / R1010C8: got 'Học sinh'
## Warning: Expecting logical in H1011 / R1011C8: got 'Học sinh'
## Warning: Expecting logical in H1068 / R1068C8: got 'sống chung nhưng không kết
## hôn'
## Warning: Expecting logical in H1069 / R1069C8: got 'Sống chung nhưng không kết
## hôn'
## Warning: Expecting logical in H2840 / R2840C8: got '.'
## Warning: Expecting logical in O2204 / R2204C15: got a date
## Warning: Expecting logical in P2204 / R2204C16: got a date
## Warning: Expecting logical in O2918 / R2918C15: got a date
## Warning: Expecting logical in P2918 / R2918C16: got a date
## Warning: Expecting logical in O2919 / R2919C15: got a date
## Warning: Expecting logical in P2919 / R2919C16: got a date
## Warning: Expecting logical in O3064 / R3064C15: got a date
## Warning: Expecting logical in P3064 / R3064C16: got a date
## Warning: Expecting logical in O3065 / R3065C15: got a date
## Warning: Expecting logical in P3065 / R3065C16: got a date
## Warning: Expecting logical in O3180 / R3180C15: got a date
## Warning: Expecting logical in P3180 / R3180C16: got a date
## Warning: Expecting logical in O3181 / R3181C15: got a date
## Warning: Expecting logical in P3181 / R3181C16: got a date
cre <- read_excel("C:/Users/huongvtl/Dropbox/Manuscript_MicroBaseline_15DEC2025/60HN_ID_MALDITOF_confirmation_20251128.xlsx")

discharge_date <- read_excel("C:/Users/huongvtl/Dropbox/Manuscript_MicroBaseline_15DEC2025/60HN - DischargeDate_no_discharge_sample_27Nov25.xlsx")

4.2 CRE datasets

CRE_admission <- subset(cre, cre$SampleSchedule =='ADMISSION'); 
CRE_admission <- CRE_admission %>%
  filter(Identification_MALDITOF %ni% c('Aeromonas caviae', 'Aeromonas hydrophila','Aeromonas veronii', 'Enterococcus faecium'))

CRE_discharge <- subset(cre, cre$SampleSchedule =='DISCHARGE')
CRE_discharge <- CRE_discharge %>%
  filter(Identification_MALDITOF %ni% c('Acinetobacter baumannii', 'Aeromonas caviae',
                                        'Aeromonas hydrophila', 'Aeromonas veronii',
                                        "Cupriavidus gilardii", "Enterococcus faecium",
                                        "Pseudomonas putida"))


length(unique(CRE_admission$USUBJID)); length(unique(CRE_discharge$USUBJID))
## [1] 361
## [1] 677
CRE_admission$hospitaltype<- ifelse(CRE_admission$SiteID %in% c('010','159'), 1, 2)
CRE_discharge$hospitaltype<- ifelse(CRE_discharge$SiteID %in% c('010','159'), 1, 2)

## subset to specific CREC or CRKP
CREC_admission <- CRE_admission %>%
  filter(Identification_MALDITOF == "Escherichia coli") %>%
  select(USUBJID, SPEC_DATE)
CREC_admission <- CREC_admission[!duplicated(CREC_admission$USUBJID),]

CREC_discharge <- CRE_discharge %>%
  filter(Identification_MALDITOF == "Escherichia coli") %>%
  select(USUBJID, SPEC_DATE)
CREC_discharge <- CREC_discharge[!duplicated(CREC_discharge$USUBJID),]


CRKP_admission <- CRE_admission %>%
  filter(Identification_MALDITOF == "Klebsiella pneumoniae") %>%
  select(USUBJID, SPEC_DATE)
CRKP_admission <- CRKP_admission[!duplicated(CRKP_admission$USUBJID),]

CRKP_discharge <- CRE_discharge %>%
  filter(Identification_MALDITOF == "Klebsiella pneumoniae") %>%
  select(USUBJID, SPEC_DATE)
CRKP_discharge <- CRKP_discharge[!duplicated(CRKP_discharge$USUBJID),]

4.3 Admission dataset

survey[["ADM"]] <- survey[["ADM"]] %>% filter(entry ==2) %>%
  mutate(hospitaltype = ifelse(SITEID %in% c('010','159'), 1, 2),
         age = 2025-YEAR_BIRTH +1,
         Ab_history = ifelse(ANB_ADMISSION %in% c(1,4),0,1),
         wardtype = as.factor(ifelse(WARD_3==1, "surgical", "medical/ID")))

survey[["SCR"]] <- survey[["SCR"]]%>% filter(entry==2) %>%
  mutate(discharge_date = SPEC_DATE_DISCHARGE) 

discharge_date <-merge(discharge_date,survey[["SCR"]], by = 'USUBJID', all = T )

discharge_date <- discharge_date %>%
  mutate(DISCHARGE_DATE = ifelse(is.na(discharge_date), `Discharge date`,discharge_date)) %>%
  select(USUBJID, DISCHARGE_DATE)

admission <- merge(survey[["ADM"]],discharge_date, by = 'USUBJID', all = T )
admission <- admission %>%
  mutate(los = as.numeric((as.POSIXct(DISCHARGE_DATE) - as.POSIXct(ADMISSION_DATE))/86400+1))

4.4 General info dataset

survey[["DEMO_HIS"]] <- survey[["DEMO_HIS"]] %>% filter(entry ==2) %>%
  mutate(JOB_final= case_when(JOB_SPECIFY %in% c("Cán bộ công chức", "Cán bộ xã") ~ 1,
            JOB_SPECIFY %in% c("Bảo vệ", "Cơ khí","Hộ lý", "Lái tàu thủy", "Lao động tự do","Vệ sỹ") ~ 4,
            JOB_SPECIFY %in% c("già", "Già","Hết độ tuổi lao động") ~ 8,
            JOB_SPECIFY %in% c("Ở nhà", "Tự do") ~ 9,
            .default = as.numeric(JOB)),
         householdsize = ADULT+CHILDREN,
         ALCOHOL_CUTDOWN_score = ifelse(ALCOHOL_CUTDOWN == 1, 0, 1),
         ALCOHOL_ANNOYED_score = ifelse(ALCOHOL_ANNOYED == 1, 0, 1),
         ALCOHOL_GUILTY_score = ifelse(ALCOHOL_GUILTY == 1, 0, 1),
         ALCOHOL_DRUG_score = ifelse(ALCOHOL_DRUG == 1, 0, 1),
         DRUG_CUTDOWN_score = ifelse(DRUG_CUTDOWN == 1, 0, 1),
         DRUG_ANNOYED_score = ifelse(DRUG_ANNOYED == 1, 0, 1),
         DRUG_GUILTY_score = ifelse(DRUG_GUILTY == 1, 0, 1),
         DRUG_DRUG_score = ifelse(DRUG_DRUG == 1, 0, 1),
         smoking = as.factor(ifelse(SMOKING == 1, "current smoker", ifelse(SMOKING == 2, "former smoker", "all non-smokers"))),
         children = as.factor(ifelse(CHILDREN == 0, "no children in the household", "with children in the household")),
         married = as.factor(ifelse(MARITAL ==1, "married", "other")),
         ambulatory = as.factor(ifelse(AMBULATORY == 1, "independent", "not independent"))
         )

survey[["DEMO_HIS"]] <- survey[["DEMO_HIS"]] %>% filter(entry ==2) %>%
  mutate(
    AlcoholCAGE = ALCOHOL_CUTDOWN_score+ALCOHOL_ANNOYED_score+ALCOHOL_GUILTY_score+ALCOHOL_DRUG_score, 
    DrugCAGE= DRUG_CUTDOWN_score+DRUG_ANNOYED_score+DRUG_GUILTY_score+DRUG_DRUG_score,
    job = as.factor(ifelse(JOB_final== 6, "farmer",ifelse(JOB_final== 7,"housewife",ifelse(JOB_final == 8, "retired", "all other jobs")))),
    )

survey[["DEMO_HIS"]] <- survey[["DEMO_HIS"]] %>% filter(entry ==2) %>%
  mutate(
    alcohol = as.factor(ifelse(AlcoholCAGE == 0, "0 problem", "1-4 problem")),
    drug = as.factor(ifelse(DrugCAGE == 0, "0 problem", "1-4 problem"))
  )

4.5 Other history dataset

InvasiveHistory <- survey[["DEMO_HIS_HISTORY_GRID"]] %>% filter(entry ==2) %>%
  filter(HIS_CONDITION ==1) %>%
  mutate(InvasiveHistory=as.factor(ifelse(HIS_YN == 2, 'Yes', 'No')))
         
MedicationHistory <- survey[["DEMO_HIS_HISTORY_GRID"]] %>% filter(entry ==2) %>%
  filter(HIS_CONDITION ==2)%>%
  mutate(MedicationHistory=as.factor(ifelse(HIS_YN == 2, 'Yes', 'No')))

TravelHistory <- survey[["DEMO_HIS_HISTORY_GRID"]] %>% filter(entry ==2) %>%
  filter(HIS_CONDITION ==3)%>%
  mutate(TravelHistory=as.factor(ifelse(HIS_YN == 2, 'Yes', 'No')))

HealthcareHistory <- survey[["DEMO_HIS_HISTORY_GRID"]] %>% filter(entry ==2) %>%
  filter(HIS_CONDITION ==4)%>%
  mutate(HealthcareHistory=as.factor(ifelse(HIS_YN == 2, 'Yes', 'No')))

4.6 Discharge dataset

discharge <- survey[["DIS"]] %>% filter(entry ==2) %>%
  mutate(
    ICDGroup = as.factor(case_when(
      FINAL_DIAG_ICD10 %in% c('A04','A04.8','A04.9','A05.9','A06.4','A08.0', 'A08.5','A09','A09.0', 'A09.9')~'ID (Intestinal)',
    FINAL_DIAG_ICD10 %in% c('A15.0', 'A15.3', 'A16.2', 'A41', 'A41.5', 'A41.8', 'A41.9','A48','A49.9', 'A79.8','A79.9', 'A93', 'A94', 'A97','B00.1', 'B01','B01.2', 'B01.8', 'B02', 'B02.7', 'B02.8','B05', 'B05.2', 'B05.8', 'B05.9', 'B17', 'B18.1', 'B18.2', 'B34.9', 'B83.8') ~ 'ID (Other)',
    FINAL_DIAG_ICD10 %in% c('C18', 'C18.9; K56.6', 'C20','C22','C22.9', 'C34.9', 'C41', 'C61', 'C67.9', 'C70.0','C91.9', 'C92.2', 'C95.0', 'D12.0', 'D17', 'D21', 'D21.9', 'D23', 'D23.9', 'D27','D37.4', 'D38.1', 'D40.0', 'D44.0', 'D48.1')~'Neoplasms',
    FINAL_DIAG_ICD10 %in% c('I10', 'I10-E11-E25-I69', 'I10; H81', 'I11.9', 'I12.5', 'I18', 'I20', 'I20.0','I20.8','I21', 'I21.4', 'I21.9', 'I24.0', 'I25', 'I25.2', 'I25.5', 'I48', 'I48.0','I48.2','I50','I50.9','I61', 'I63','I63.8', 'I64','I67','I67-I10', 'I67 - I10','I69.4', 'I70.2', 'I80', 'I87.2','I88.8', 'I92.9', 'I95', 'I99') ~ 'Circulatory diseases',
    FINAL_DIAG_ICD10 %in% c('J01','J01.9','J02','J02.9','J04.0','J06','J06.9','J09', 'J10.1','J10.8','J11','J11.1','J11.8', 'J15', 'J15.9', 'J16', 'J17.1', 'J18', 'J18-I20.2', 'J18.0', 'J18.1', 'J18.8', 'J18.9', 'J20', 'J20.9', 'J40', 'J42', 'J44', 'J44.0', 'J44.1', 'J44.9', 'J45', 'J90','J96.0', 'J96.9','J96/18') ~ 'Respiratory diseases',
   FINAL_DIAG_ICD10 %in% c('K11.2','K12.3', 'K21', 'K21.0', 'K21.9', 'K22.6', 'K25', 'K25.0', 'K25.3', 'K25.9','K26.0', 'K27', 'K27-J20', 'K27.0', 'K27.3', 'K27.9', 'K29', 'K29.0', 'K29.1','K29.2','K29.6', 'K29.9','K30', 'K35', 'K35.1', 'K35.3','K35.5','K35.8', 'K35.9','K36', 'K40', 'K40.9', 'K42', 'K50', 'K52.3', 'K52.9', 'K56', 'K56.4', 'K56.6', 'K57.0', ' K57.3','K57.9','K58','K58.0', 'K59.1', 'K60.3', 'K61.0', 'K61.2', 'K62.1', 'K62.5', 'K63', 'K63.1','K63.5','K64', 'K64.2', 'K64.3', 'K64.8', 'K65', 'K71.2', 'K71.6', 'K72.0', 'K72.2', 'K73','K74', 'K74.0','K74.1','K74.2', 'K74.6', 'K75.8','K77','K78.6', 'K80', 'K80.0','K80.1','K80.2','K80.3','K80.4','K80.8', 'K81', 'K81.0','K83', 'K83.0','K85', 'K85.0', 'K85.8','K85.9', 'K86.1','K92','K92.2','K92.8','K92.9') ~ 'Digestive diseases',
   FINAL_DIAG_ICD10 %in%  c('N00','N13.2','N13.3','N13.6','N17','N17.9','N18','N18.4','N18.5','N18.9','N19','N20','N20.0','N20.1','N20.1, N23, N20.0, K21, M47','N20.2','N21.0', 'N21.1','N23','N28','N30.0', 'N30.8','N32.0', 'N34.1', 'N39.0', 'N40', 'N43.2','N45', 'N47','N48.1','N70') ~ 'Genitourinary diseases',

   TRUE ~ 'Other')),
   ICUduration = difftime(ICU_TO, ICU_FROM, units = 'days')+1,
   CVCduration = difftime(CVC_TO, CVC_FROM, units = 'days')+1,
   PVCduration = difftime(PVC_TO, PVC_FROM, units = 'days')+1,
   UCduration = difftime(UC_TO, UC_FROM, units = 'days')+1,
   IIDduration = difftime(IID_TO, IID_FROM, units = 'days')+1
   )

4.7 Antibiotic use during hospitalization

# Antibiotic use during hospitalization at patient level
antibiotic_hospital <- survey[["ANT"]] %>% filter(entry ==2) %>%
  mutate(antibiotic_hospital = ifelse(ANB_HOS == 3, "yes", "no"))

#antibiotic agents used in hospital, each patient can have more than one antibiotic agents
antibiotic_hospital_agents <- survey[["ANT_ANB_HOSP_GRID"]] %>% filter(entry ==2) %>%
  mutate(agent = ifelse(ANB_OTHER_SPEC %in% c('Cefoperazol','Cefoperazole', 'Cefoperazon', 'Cefoperazone'), 'Cefoperazone',
                        ifelse(ANB_OTHER_SPEC %in% c('Senitram'), 'Ampicillin_sulbactam',
                               ifelse(ANB_OTHER_SPEC %in% c('Cefamandol','Cefemandol'), 'Cefamandol',
                                      ifelse(ANB_OTHER_SPEC %in% c('Vitabactam'), 'Cefoperazone_sulbactam',
                                             ifelse(ANB_OTHER_SPEC %in% c('Cefazolin'), 'Cefazolin',
                                             ifelse(ANB_OTHER_SPEC %in% c('Cefprozil'), 'Cefprozil',
                                                    ifelse(ANB_OTHER_SPEC %in% c('Cefradin'), 'Cefradin',
                                                           ifelse(ANB_OTHER_SPEC %in% c('Ceftizoxim', 'Ceftizoxime', 'Ceftizoxom', 'Ceftrizoxim'), 'Ceftizoxime',
                                                                  ifelse(ANB_OTHER_SPEC %in% c('Cefprozil'), 'Cefprozil',
                                                                  ifelse(ANB_OTHER_SPEC %in% c('Cefmetazol'), 'Cefmetazol', 
                                                                         ifelse(ANB_OTHER_SPEC %in% c('Ticarcillin + Acid clavulanic', 'Ticarcillin + Clavulanic', 'Ticarcillin/acid clavulanic'), 'Ticarcillin/acid clavulanic',
                                 ANB_HOSP_NAME))))))))))),
         number = 1,
         DOT = as.numeric((ANB_HOSP_END_DATE-ANB_HOSP_START_DATE)/86400 +1))



# calculate DOT of antibiotics used in hospital for each patient, at patient level
antibiotic_hospital_DOT <- antibiotic_hospital_agents %>%
  group_by(USUBJID) %>%
  summarise(number = sum(number), DOT=sum(DOT))

4.8 merge all data in the CRF

#select the variables that are relevant for further modeling

all_data <- merge(admission, survey[["DEMO_HIS"]][,c("USUBJID","smoking","children","married","ambulatory","job","alcohol","drug")], by = "USUBJID", all=T )
all_data<- merge(all_data, InvasiveHistory[,c("USUBJID","InvasiveHistory")], by = "USUBJID", all=T )
all_data<- merge(all_data, MedicationHistory[,c("USUBJID","MedicationHistory")], by = "USUBJID", all=T )
all_data<- merge(all_data, TravelHistory[,c("USUBJID","TravelHistory")], by = "USUBJID", all=T )
all_data<- merge(all_data, HealthcareHistory[,c("USUBJID","HealthcareHistory")], by = "USUBJID", all=T )
all_data<- merge(all_data, discharge[,c("USUBJID","ICDGroup","ICUduration","CVCduration","PVCduration","UCduration", "IIDduration",
                                        "DISCHARGE","ICU","CVC","PVC","UC","IID")], by = "USUBJID", all=T )
all_data<- merge(all_data, antibiotic_hospital[, c("USUBJID","antibiotic_hospital")], by = "USUBJID", all=T )
all_data<- merge(all_data, antibiotic_hospital_DOT, by = "USUBJID", all=T )


# to add CRE data depending on type of carriage examined for further analysis

5 Prepare risk datasets for Lasso modeling

riskdata <- merge(all_data, CREC_admission, by ="USUBJID", all=T )
riskdata <- riskdata %>%
  mutate(CRE_carriage = ifelse(is.na(SPEC_DATE), 0, 1),
         smoking2 = as.factor(ifelse(smoking == "all non-smokers", "all non-smokers", "smokers")),
         children2 = as.factor(ifelse(is.na(children), 1, children)), 
         # NA to be classified as 1= having no children living in the house; 2 = having children living in the house
         job_farmer = as.factor(ifelse(job == "farmer", "farmers", "allothers")), #1=others, 3=housewife 4= retired 2 = farmers
         job_housewife = as.factor(ifelse(job == "housewife", "housewives", "allothers")), #1=others, 3=housewife 4= retired 2 = farmers
         job_retired = as.factor(ifelse(job == "retired", "retired", "allothers")), #1=others, 3=housewife 4= retired 2 = farmers
         heart_diseases = as.factor(ifelse(as.factor(MI_HIS+CHF)==0, 0, 1)),
         vascular_diseases = as.factor(ifelse(as.factor(CVD+PVD)==0, 0,1)),
         cancer = as.factor(TUMOR_NOMETA + LEUKEMIA + LYMPHOMA + META_TUMOR),
         ICD_Circulatory = as.factor(ifelse(ICDGroup == "Circulatory diseases", "Circulatory diseases", "allothers")), 
         ICD_Genitourinary = as.factor(ifelse(ICDGroup == "Genitourinary diseases", "Genitourinary diseases", "allothers")),
         ICD_IDIntestinal = as.factor(ifelse(ICDGroup == "ID (Intestinal)", "ID (Intestinal)", "allothers")),
         ICD_IDOther = as.factor(ifelse(ICDGroup == "ID (Other)", "ID (Other)", "allothers")),
         ICD_Neoplasms = as.factor(ifelse(ICDGroup == "Neoplasms", "Neoplasms", "allothers")),
         ICD_Digestive = as.factor(ifelse(ICDGroup == "Digestive diseases", "Digestive diseases", "allothers")),
         ICD_Respiratory = as.factor(ifelse(ICDGroup == "Respiratory diseases", "Respiratory diseases", "allothers")),
         )

6 Prepare variables for Lasso modeling

###standardize predictor variables before Lasso regression because the penalty is applied to the magnitude of coefficients; standardizing (to mean 0, std dev 1) ensures all variables are treated equally, allowing for meaningful coefficient comparison and selection based on true predictive power, not scale.
##Check Defaults: Confirm glmmLassoControl settings; standardize = TRUE is the standard for 2026 to ensure the L1 penalty is consistent.
##Factor Recognition: Ensure categorical variables are recognized as factors so the algorithm can apply group-wise updates. 

#for continuous variables - check if re-scaled back when interpreting

riskdata_scaled <- 
  riskdata %>% 
  mutate(
    age_s = scale(age), 
    gender = as.factor(GENDER),
    hospital = as.factor(SITEID),
    cormobidity = as.factor(CORMOBIDITY),
    dementia = as.factor(DEMENTIA),
    cpd = as.factor(CPD),
    ctd = as.factor(CTD),
    pud = as.factor(PUD),
    mild_liver = as.factor(MILD_LIVER),
    sev_liver = as.factor(SEV_LIVER),
    hemiplegia = as.factor(HEMIPLEGIA),
    sev_renal = as.factor(SEV_RENAL),
    diabetes_noeod = as.factor(DIAB_NOEOD),
    diabetes_eod = as.factor(DIAB_EOD),
  )

#no patient has AIDS, also not include pvd as no cases have pvd

## to fix NAs and check data
vars_to_keep <- c("CRE_carriage", "age_s", "gender", "smoking2","hospital", "SITEID", "children2", "married", "ambulatory", "job_farmer", "job_housewife", "job_retired", "cormobidity", "heart_diseases", "vascular_diseases", "dementia", "cpd", "ctd","pud","mild_liver", "sev_liver", "hemiplegia", "sev_renal", "diabetes_noeod","diabetes_eod", "cancer","alcohol", "drug","wardtype","Ab_history", "InvasiveHistory", "MedicationHistory","HealthcareHistory", "hospitaltype", "ICD_Circulatory", "ICD_Genitourinary", "ICD_IDIntestinal", "ICD_IDOther", "ICD_Neoplasms", "ICD_Respiratory", "ICD_Digestive")

data_clean <- riskdata_scaled[complete.cases(riskdata_scaled[,vars_to_keep]),]

print(nrow(riskdata_scaled) - nrow(data_clean)) # Check how many rows you lost
## [1] 27

7 Fit a simple glmm model

mixed_formula <- CRE_carriage ~ age_s +gender+smoking2+children2+married+ambulatory+
  job_farmer+job_housewife+job_retired+cormobidity+
  vascular_diseases+cpd+ctd+pud+mild_liver+sev_liver+hemiplegia+sev_renal+diabetes_noeod+diabetes_eod+cancer+
  alcohol +drug+wardtype+InvasiveHistory +MedicationHistory+HealthcareHistory + hospitaltype+ Ab_history+
  ICD_Circulatory + ICD_Genitourinary + ICD_IDIntestinal + ICD_IDOther +
  ICD_Neoplasms+ICD_Respiratory+ICD_Digestive +( 1| SITEID) #not include heart_diseases and dementia yet -


simple_mixed_model <- glmer(mixed_formula, data = data_clean, family = binomial(link = "logit"))
summary(simple_mixed_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: CRE_carriage ~ age_s + gender + smoking2 + children2 + married +  
##     ambulatory + job_farmer + job_housewife + job_retired + cormobidity +  
##     vascular_diseases + cpd + ctd + pud + mild_liver + sev_liver +  
##     hemiplegia + sev_renal + diabetes_noeod + diabetes_eod +  
##     cancer + alcohol + drug + wardtype + InvasiveHistory + MedicationHistory +  
##     HealthcareHistory + hospitaltype + Ab_history + ICD_Circulatory +  
##     ICD_Genitourinary + ICD_IDIntestinal + ICD_IDOther + ICD_Neoplasms +  
##     ICD_Respiratory + ICD_Digestive + (1 | SITEID)
##    Data: data_clean
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    1670.3    1882.6    -797.1    1594.3      1935 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.0742 -0.4458 -0.3612 -0.2824  4.7425 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  SITEID (Intercept) 0.05936  0.2436  
## Number of obs: 1973, groups:  SITEID, 12
## 
## Fixed effects:
##                                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)                             -3.04863    0.55648  -5.478 4.29e-08
## age_s                                   -0.08125    0.08753  -0.928 0.353234
## gender2                                  0.09374    0.20118   0.466 0.641259
## smoking2smokers                          0.19723    0.20733   0.951 0.341450
## children22                               0.29312    0.13522   2.168 0.030185
## marriedother                             0.03261    0.18521   0.176 0.860219
## ambulatorynot independent                0.26976    0.31857   0.847 0.397116
## job_farmerfarmers                        0.34261    0.17785   1.926 0.054056
## job_housewifehousewives                  0.80347    0.24365   3.298 0.000975
## job_retiredretired                       0.29326    0.22513   1.303 0.192706
## cormobidity2                            -0.05679    0.18022  -0.315 0.752692
## vascular_diseases1                      -0.56210    0.46530  -1.208 0.227029
## cpdTRUE                                  1.13343    0.35865   3.160 0.001576
## ctdTRUE                                  0.06556    0.79168   0.083 0.934002
## pudTRUE                                 -0.75411    0.54187  -1.392 0.164019
## mild_liverTRUE                          -0.32935    0.55631  -0.592 0.553830
## sev_liverTRUE                            0.24569    0.58149   0.423 0.672649
## hemiplegiaTRUE                           0.87307    0.65513   1.333 0.182640
## sev_renalTRUE                            0.53548    0.40039   1.337 0.181095
## diabetes_noeodTRUE                       0.36766    0.20074   1.832 0.067016
## diabetes_eodTRUE                         1.01396    0.37583   2.698 0.006977
## cancer1                                 -1.01326    0.77143  -1.313 0.189016
## alcohol1-4 problem                       0.27727    0.22047   1.258 0.208521
## drug1-4 problem                         -0.31395    0.44966  -0.698 0.485054
## wardtypesurgical                         0.54441    0.17389   3.131 0.001743
## InvasiveHistoryYes                       0.45695    0.25613   1.784 0.074412
## MedicationHistoryYes                    -0.03693    0.17176  -0.215 0.829776
## HealthcareHistoryYes                     0.24147    0.19403   1.245 0.213315
## hospitaltype                             0.07805    0.25385   0.307 0.758502
## Ab_history                               0.20267    0.18323   1.106 0.268673
## ICD_CirculatoryCirculatory diseases      0.35174    0.24942   1.410 0.158464
## ICD_GenitourinaryGenitourinary diseases  0.23871    0.24904   0.959 0.337803
## ICD_IDIntestinalID (Intestinal)          0.81339    0.27390   2.970 0.002982
## ICD_IDOtherID (Other)                    0.07753    0.39353   0.197 0.843822
## ICD_NeoplasmsNeoplasms                  -0.12308    0.41420  -0.297 0.766344
## ICD_RespiratoryRespiratory diseases      0.41134    0.23511   1.750 0.080196
## ICD_DigestiveDigestive diseases          0.11174    0.20634   0.542 0.588136
##                                            
## (Intercept)                             ***
## age_s                                      
## gender2                                    
## smoking2smokers                            
## children22                              *  
## marriedother                               
## ambulatorynot independent                  
## job_farmerfarmers                       .  
## job_housewifehousewives                 ***
## job_retiredretired                         
## cormobidity2                               
## vascular_diseases1                         
## cpdTRUE                                 ** 
## ctdTRUE                                    
## pudTRUE                                    
## mild_liverTRUE                             
## sev_liverTRUE                              
## hemiplegiaTRUE                             
## sev_renalTRUE                              
## diabetes_noeodTRUE                      .  
## diabetes_eodTRUE                        ** 
## cancer1                                    
## alcohol1-4 problem                         
## drug1-4 problem                            
## wardtypesurgical                        ** 
## InvasiveHistoryYes                      .  
## MedicationHistoryYes                       
## HealthcareHistoryYes                       
## hospitaltype                               
## Ab_history                                 
## ICD_CirculatoryCirculatory diseases        
## ICD_GenitourinaryGenitourinary diseases    
## ICD_IDIntestinalID (Intestinal)         ** 
## ICD_IDOtherID (Other)                      
## ICD_NeoplasmsNeoplasms                     
## ICD_RespiratoryRespiratory diseases     .  
## ICD_DigestiveDigestive diseases            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 37 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
initial_fixed_estimates <- fixef(simple_mixed_model)
initial_random_estimates <- ranef(simple_mixed_model)
Delta.start<-c(as.numeric(initial_fixed_estimates),rep(0,100),
               as.numeric(initial_random_estimates))
Q.start<-as.numeric(VarCorr(simple_mixed_model))

8 Find the best lambda for glmmLasso

# Define a sequence of lambda values (from high penalty to low)
lambda_seq <- seq(10, 0, by = -0.1)

#fixed formula
fixed_formula <- CRE_carriage ~ age_s +gender+smoking2+children2+married+ambulatory+
  job_farmer+job_housewife+job_retired+cormobidity+
  vascular_diseases+cpd+ctd+pud+mild_liver+sev_liver+hemiplegia+sev_renal+diabetes_noeod+diabetes_eod+cancer+
  alcohol +drug+wardtype+InvasiveHistory +MedicationHistory+HealthcareHistory + hospitaltype+ Ab_history+
  ICD_Circulatory + ICD_Genitourinary + ICD_IDIntestinal + ICD_IDOther +
  ICD_Neoplasms+ICD_Respiratory+ICD_Digestive

# Initialize a vector to store BIC or AIC values
baic_vec <- rep(Inf, length(lambda_seq))

# Create a loop to test each lambda
for(j in 1:length(lambda_seq)){
  print(paste("Testing Lambda:", lambda_seq[j]))
  #Use try() to prevent the loop from breaking if one model fails to converge
  glmm_temp <- try(glmmLasso(
    fix = fixed_formula,
    rnd = list(hospital=~1),
    data = data_clean,
    lambda = lambda_seq[j],
    family = binomial(),
    switch.NR = TRUE,
    final.re = FALSE,
    control=list(start=Delta.start,q_start=Q.start)),
    silent = TRUE)
  #If the model fit successfully, save the BIC
  if(!inherits(glmm_temp, "try-error")){
    baic_vec[j] <- glmm_temp$bic
  }
}
## [1] "Testing Lambda: 10"
## [1] "Testing Lambda: 9.9"
## [1] "Testing Lambda: 9.8"
## [1] "Testing Lambda: 9.7"
## [1] "Testing Lambda: 9.6"
## [1] "Testing Lambda: 9.5"
## [1] "Testing Lambda: 9.4"
## [1] "Testing Lambda: 9.3"
## [1] "Testing Lambda: 9.2"
## [1] "Testing Lambda: 9.1"
## [1] "Testing Lambda: 9"
## [1] "Testing Lambda: 8.9"
## [1] "Testing Lambda: 8.8"
## [1] "Testing Lambda: 8.7"
## [1] "Testing Lambda: 8.6"
## [1] "Testing Lambda: 8.5"
## [1] "Testing Lambda: 8.4"
## [1] "Testing Lambda: 8.3"
## [1] "Testing Lambda: 8.2"
## [1] "Testing Lambda: 8.1"
## [1] "Testing Lambda: 8"
## [1] "Testing Lambda: 7.9"
## [1] "Testing Lambda: 7.8"
## [1] "Testing Lambda: 7.7"
## [1] "Testing Lambda: 7.6"
## [1] "Testing Lambda: 7.5"
## [1] "Testing Lambda: 7.4"
## [1] "Testing Lambda: 7.3"
## [1] "Testing Lambda: 7.2"
## [1] "Testing Lambda: 7.1"
## [1] "Testing Lambda: 7"
## [1] "Testing Lambda: 6.9"
## [1] "Testing Lambda: 6.8"
## [1] "Testing Lambda: 6.7"
## [1] "Testing Lambda: 6.6"
## [1] "Testing Lambda: 6.5"
## [1] "Testing Lambda: 6.4"
## [1] "Testing Lambda: 6.3"
## [1] "Testing Lambda: 6.2"
## [1] "Testing Lambda: 6.1"
## [1] "Testing Lambda: 6"
## [1] "Testing Lambda: 5.9"
## [1] "Testing Lambda: 5.8"
## [1] "Testing Lambda: 5.7"
## [1] "Testing Lambda: 5.6"
## [1] "Testing Lambda: 5.5"
## [1] "Testing Lambda: 5.4"
## [1] "Testing Lambda: 5.3"
## [1] "Testing Lambda: 5.2"
## [1] "Testing Lambda: 5.1"
## [1] "Testing Lambda: 5"
## [1] "Testing Lambda: 4.9"
## [1] "Testing Lambda: 4.8"
## [1] "Testing Lambda: 4.7"
## [1] "Testing Lambda: 4.6"
## [1] "Testing Lambda: 4.5"
## [1] "Testing Lambda: 4.4"
## [1] "Testing Lambda: 4.3"
## [1] "Testing Lambda: 4.2"
## [1] "Testing Lambda: 4.1"
## [1] "Testing Lambda: 4"
## [1] "Testing Lambda: 3.9"
## [1] "Testing Lambda: 3.8"
## [1] "Testing Lambda: 3.7"
## [1] "Testing Lambda: 3.6"
## [1] "Testing Lambda: 3.5"
## [1] "Testing Lambda: 3.4"
## [1] "Testing Lambda: 3.3"
## [1] "Testing Lambda: 3.2"
## [1] "Testing Lambda: 3.1"
## [1] "Testing Lambda: 3"
## [1] "Testing Lambda: 2.9"
## [1] "Testing Lambda: 2.8"
## [1] "Testing Lambda: 2.7"
## [1] "Testing Lambda: 2.6"
## [1] "Testing Lambda: 2.5"
## [1] "Testing Lambda: 2.4"
## [1] "Testing Lambda: 2.3"
## [1] "Testing Lambda: 2.2"
## [1] "Testing Lambda: 2.1"
## [1] "Testing Lambda: 2"
## [1] "Testing Lambda: 1.9"
## [1] "Testing Lambda: 1.8"
## [1] "Testing Lambda: 1.7"
## [1] "Testing Lambda: 1.6"
## [1] "Testing Lambda: 1.5"
## [1] "Testing Lambda: 1.4"
## [1] "Testing Lambda: 1.3"
## [1] "Testing Lambda: 1.2"
## [1] "Testing Lambda: 1.1"
## [1] "Testing Lambda: 1"
## [1] "Testing Lambda: 0.9"
## [1] "Testing Lambda: 0.799999999999999"
## [1] "Testing Lambda: 0.699999999999999"
## [1] "Testing Lambda: 0.6"
## [1] "Testing Lambda: 0.5"
## [1] "Testing Lambda: 0.399999999999999"
## [1] "Testing Lambda: 0.299999999999999"
## [1] "Testing Lambda: 0.199999999999999"
## [1] "Testing Lambda: 0.0999999999999996"
## [1] "Testing Lambda: 0"
# Find the lambda that produced the lowest BIC
opt_lambda <- lambda_seq[which.min(baic_vec)]
print(paste("Optimal Lambda is:", opt_lambda))
## [1] "Optimal Lambda is: 9.7"
baic_vec
##   [1] 1822.034 1822.102 1822.113 1822.027 1825.502 1825.397 1825.293 1825.185
##   [9] 1857.073 1824.937 1824.791 1824.627 1824.448 1824.260 1824.069 1823.880
##  [17] 1823.729 1829.624 1861.217 1823.064 1829.180 1835.579 1829.174 1829.177
##  [25] 1829.181 1822.098 1822.103 1822.107 1835.439 1828.889 1835.450 1865.352
##  [33] 1829.198 1829.201 1829.194 1836.818 1835.431 1836.801 1843.034 1843.052
##  [41] 1842.992 1842.996 1843.054 2036.173 1836.791 1857.665 1849.305 1850.560
##  [49] 1871.829 1850.508 1855.952 1870.854 1863.542 1850.596 1850.605 1842.989
##  [57] 1850.608 1850.600 1850.609 1850.624 1850.633 1866.030 1865.548 2167.958
##  [65] 1850.559 1850.027 1850.603 1847.965 1946.747 1857.606 1862.996 1870.074
##  [73] 1866.586 1870.024 1877.561 1877.524 1877.589 1885.203 1877.593 1892.686
##  [81] 1885.046 1885.106 1885.051 1885.097 1885.038 1885.100 1892.668 1896.996
##  [89] 1892.722 1892.695 1892.670 1900.354 1900.356 1907.956 1900.306 1909.492
##  [97] 1908.015 1912.937 1907.215 1907.326 1907.835
failed_indices <- which(is.infinite(baic_vec))
print(lambda_seq[failed_indices])
## numeric(0)

9 Fit the final Lasso model

CRE_admission_model <- glmmLasso(
  fix=fixed_formula,
  rnd=list(hospital = ~1),
  data = data_clean,
  lambda = opt_lambda, # larger -> stronger regularization and more coefficients shrunk to zero; smaller reduces penalty
  family = binomial(link = logit),
  switch.NR=FALSE, # switch to the faster Newton-Raphson method to speed up model fitting process
  final.re=TRUE,
  control=list(start=Delta.start,q_start=Q.start)
  )

CRE_admission_model %>% summary()
## Call:
## glmmLasso(fix = fixed_formula, rnd = list(hospital = ~1), data = data_clean, 
##     lambda = opt_lambda, family = binomial(link = logit), switch.NR = FALSE, 
##     final.re = TRUE, control = list(start = Delta.start, q_start = Q.start))
## 
## 
## Fixed Effects:
## 
## Coefficients:
##                                          Estimate    StdErr  z.value   p.value
## (Intercept)                             -2.896614  0.105325 -27.5016 < 2.2e-16
## age_s                                   -0.080755  0.087546  -0.9224  0.356305
## gender2                                  0.096457  0.200970   0.4800  0.631256
## smoking2smokers                          0.200728  0.206691   0.9712  0.331473
## children22                               0.294096  0.135142   2.1762  0.029541
## marriedother                             0.032647  0.185451   0.1760  0.860263
## ambulatorynot independent                0.271756  0.318637   0.8529  0.393731
## job_farmerfarmers                        0.344491  0.177792   1.9376  0.052671
## job_housewifehousewives                  0.799224  0.243337   3.2844  0.001022
## job_retiredretired                       0.298668  0.224700   1.3292  0.183787
## cormobidity2                            -0.057584  0.180324  -0.3193  0.749471
## vascular_diseases1                      -0.562984  0.465321  -1.2099  0.226324
## cpdTRUE                                  1.129586  0.357920   3.1560  0.001600
## ctdTRUE                                  0.076895  0.791161   0.0972  0.922574
## pudTRUE                                 -0.743173  0.542339  -1.3703  0.170590
## mild_liverTRUE                          -0.317834  0.556119  -0.5715  0.567647
## sev_liverTRUE                            0.247695  0.580272   0.4269  0.669481
## hemiplegiaTRUE                           0.866535  0.654034   1.3249  0.185201
## sev_renalTRUE                            0.503581  0.394461   1.2766  0.201733
## diabetes_noeodTRUE                       0.363475  0.200314   1.8145  0.069597
## diabetes_eodTRUE                         1.014803  0.375207   2.7046  0.006838
## cancer1                                 -1.016400  0.772013  -1.3166  0.187987
## alcohol1-4 problem                       0.275713  0.220487   1.2505  0.211128
## drug1-4 problem                         -0.310287  0.450224  -0.6892  0.490708
## wardtypesurgical                         0.547903  0.173818   3.1522  0.001621
## InvasiveHistoryYes                       0.450874  0.256014   1.7611  0.078217
## MedicationHistoryYes                    -0.041735  0.171448  -0.2434  0.807675
## HealthcareHistoryYes                     0.244828  0.192925   1.2690  0.204429
## hospitaltype                             0.000000        NA       NA        NA
## Ab_history                               0.206719  0.182411   1.1333  0.257105
## ICD_CirculatoryCirculatory diseases      0.342531  0.247108   1.3862  0.165698
## ICD_GenitourinaryGenitourinary diseases  0.229770  0.248203   0.9257  0.354583
## ICD_IDIntestinalID (Intestinal)          0.794326  0.271597   2.9246  0.003448
## ICD_IDOtherID (Other)                    0.076582  0.391350   0.1957  0.844855
## ICD_NeoplasmsNeoplasms                  -0.131703  0.413808  -0.3183  0.750280
## ICD_RespiratoryRespiratory diseases      0.409848  0.235195   1.7426  0.081406
## ICD_DigestiveDigestive diseases          0.096540  0.203067   0.4754  0.634495
##                                            
## (Intercept)                             ***
## age_s                                      
## gender2                                    
## smoking2smokers                            
## children22                              *  
## marriedother                               
## ambulatorynot independent                  
## job_farmerfarmers                       .  
## job_housewifehousewives                 ** 
## job_retiredretired                         
## cormobidity2                               
## vascular_diseases1                         
## cpdTRUE                                 ** 
## ctdTRUE                                    
## pudTRUE                                    
## mild_liverTRUE                             
## sev_liverTRUE                              
## hemiplegiaTRUE                             
## sev_renalTRUE                              
## diabetes_noeodTRUE                      .  
## diabetes_eodTRUE                        ** 
## cancer1                                    
## alcohol1-4 problem                         
## drug1-4 problem                            
## wardtypesurgical                        ** 
## InvasiveHistoryYes                      .  
## MedicationHistoryYes                       
## HealthcareHistoryYes                       
## hospitaltype                               
## Ab_history                                 
## ICD_CirculatoryCirculatory diseases        
## ICD_GenitourinaryGenitourinary diseases    
## ICD_IDIntestinalID (Intestinal)         ** 
## ICD_IDOtherID (Other)                      
## ICD_NeoplasmsNeoplasms                     
## ICD_RespiratoryRespiratory diseases     .  
## ICD_DigestiveDigestive diseases            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Random Effects:
## 
## StdDev:
##           hospital
## hospital 0.2737507
model_summary <- summary(CRE_admission_model)

10 Final estimates

std_errors <- model_summary$coefficients[, "StdErr"]; 
tab <- cbind(Est = coef(CRE_admission_model), LL = coef(CRE_admission_model) - 1.96 * std_errors, UL = coef(CRE_admission_model) + 1.96 *std_errors); 

round(cbind(exp(tab),p=coef(summary(CRE_admission_model))[,4]),digits = 3) #p-value
##                                           Est    LL    UL     p
## (Intercept)                             0.055 0.045 0.068 0.000
## age_s                                   0.922 0.777 1.095 0.356
## gender2                                 1.101 0.743 1.633 0.631
## smoking2smokers                         1.222 0.815 1.833 0.331
## children22                              1.342 1.030 1.749 0.030
## marriedother                            1.033 0.718 1.486 0.860
## ambulatorynot independent               1.312 0.703 2.450 0.394
## job_farmerfarmers                       1.411 0.996 2.000 0.053
## job_housewifehousewives                 2.224 1.380 3.583 0.001
## job_retiredretired                      1.348 0.868 2.094 0.184
## cormobidity2                            0.944 0.663 1.344 0.749
## vascular_diseases1                      0.570 0.229 1.418 0.226
## cpdTRUE                                 3.094 1.534 6.241 0.002
## ctdTRUE                                 1.080 0.229 5.091 0.923
## pudTRUE                                 0.476 0.164 1.377 0.171
## mild_liverTRUE                          0.728 0.245 2.164 0.568
## sev_liverTRUE                           1.281 0.411 3.995 0.669
## hemiplegiaTRUE                          2.379 0.660 8.571 0.185
## sev_renalTRUE                           1.655 0.764 3.585 0.202
## diabetes_noeodTRUE                      1.438 0.971 2.130 0.070
## diabetes_eodTRUE                        2.759 1.322 5.756 0.007
## cancer1                                 0.362 0.080 1.643 0.188
## alcohol1-4 problem                      1.317 0.855 2.030 0.211
## drug1-4 problem                         0.733 0.303 1.772 0.491
## wardtypesurgical                        1.730 1.230 2.432 0.002
## InvasiveHistoryYes                      1.570 0.950 2.593 0.078
## MedicationHistoryYes                    0.959 0.685 1.342 0.808
## HealthcareHistoryYes                    1.277 0.875 1.864 0.204
## hospitaltype                            1.000    NA    NA    NA
## Ab_history                              1.230 0.860 1.758 0.257
## ICD_CirculatoryCirculatory diseases     1.409 0.868 2.286 0.166
## ICD_GenitourinaryGenitourinary diseases 1.258 0.774 2.047 0.355
## ICD_IDIntestinalID (Intestinal)         2.213 1.300 3.768 0.003
## ICD_IDOtherID (Other)                   1.080 0.501 2.325 0.845
## ICD_NeoplasmsNeoplasms                  0.877 0.390 1.973 0.750
## ICD_RespiratoryRespiratory diseases     1.507 0.950 2.389 0.081
## ICD_DigestiveDigestive diseases         1.101 0.740 1.640 0.634