# get px location codes and standardize

proc_location <- load_codeset("proc_locations_revised", indexes = "procedure_concept_id") %>% # may be updated
  mutate(location = case_when(
    location == "chest" ~ "rib_thorax",
    location == "patella" ~ "knee",
    location == "vertebra" ~ "spine",
    str_detect(location,"site_unspec")~"site_unspec",
    TRUE ~ location
  )) %>%
  compute_new()

# get dx location codes and standardize

dx_fracture <-
  load_codeset("dx_fracture_lu") %>%
  mutate(location = case_when(
    location == "chest" ~ "rib_thorax",
    location == "patella" ~ "knee",
    location == "vertebra" ~ "spine",
    location == "site_unspecified" ~ "site_unspec",
    TRUE ~ location
  )) %>%
  compute_new()


# Method 1

# get fracture diagnoses

has_fracture_dx <- results_tbl("cdm_condition_occurrence") %>%
  inner_join(dx_fracture, by = c("condition_concept_id" = "concept_id")) %>%
  mutate() %>%
  #  dplyr::select(person_id, condition_concept_id, condition_concept_name, condition_start_date, condition_start_datetime, dx_location=location) %>%
  dplyr::select(person_id, condition_start_date, condition_start_datetime, dx_location = location) %>%
  compute_new()

# check each procedure type individually.  Save dates and locations for each type

has_imaging_px_1 <-
  results_tbl("cdm_procedure_occurrence") %>%
  inner_join(load_codeset("px_radiologic"), by = c("procedure_concept_id" = "concept_id")) %>%
  mutate(location = "site_unspec") %>%  # V3 - no longer trusting location from imaging
  dplyr::select(person_id, imaging_datetime = procedure_datetime, imaging_date = procedure_date, imaging_location = location) %>%
  compute_new()

has_imaging_px_2 <-
  results_tbl("cdm_procedure_occurrence") %>%
  inner_join(load_codeset("px_radiologic"), by = c("procedure_source_concept_id" = "concept_id")) %>%
  mutate(location = "site_unspec") %>%  # V3 - no longer trusting location from imaging
  dplyr::select(person_id, imaging_datetime = procedure_datetime, imaging_date = procedure_date, imaging_location = location) %>%
  compute_new()

has_imaging_px <-
  dplyr::union(has_imaging_px_1, has_imaging_px_2) %>%
  compute_new()

has_casting_px_1 <-
  results_tbl("cdm_procedure_occurrence") %>%
  inner_join(load_codeset("px_casting"), by = c("procedure_concept_id" = "concept_id")) %>%
  left_join(proc_location) %>%
  #  dplyr::select(person_id, procedure_datetime, procedure_date, casting_location=location, procedure_concept_id, procedure_concept_name) %>%
  dplyr::select(person_id, casting_datetime = procedure_datetime, casting_date = procedure_date, casting_location = location) %>%
  compute_new()

has_casting_px_2 <-
  results_tbl("cdm_procedure_occurrence") %>%
  inner_join(load_codeset("px_casting"), by = c("procedure_source_concept_id" = "concept_id")) %>%
  left_join(proc_location) %>%
  #  dplyr::select(person_id, procedure_datetime, procedure_date, casting_location=location, procedure_concept_id, procedure_concept_name) %>%
  dplyr::select(person_id, casting_datetime = procedure_datetime, casting_date = procedure_date, casting_location = location) %>%
  compute_new()

has_casting_px <-
  dplyr::union(has_casting_px_1, has_casting_px_2) %>%
  compute_new()

has_ortho_px_1 <-
  results_tbl("cdm_procedure_occurrence") %>%
  inner_join(load_codeset("px_ortho_lu"), by = c("procedure_concept_id" = "concept_id")) %>%
  left_join(proc_location) %>%
  # dplyr::select(person_id, procedure_datetime, procedure_date, ortho_location=location, procedure_concept_id, procedure_concept_name) %>%
  dplyr::select(person_id, ortho_datetime = procedure_datetime, ortho_date = procedure_date, ortho_location = location, procedure_concept_id, procedure_concept_name) %>%
  compute_new()

has_ortho_px_2 <-
  results_tbl("cdm_procedure_occurrence") %>%
  inner_join(load_codeset("px_ortho_lu"), by = c("procedure_source_concept_id" = "concept_id")) %>%
  left_join(proc_location) %>%
  # dplyr::select(person_id, procedure_datetime, procedure_date, ortho_location=location, procedure_concept_id, procedure_concept_name) %>%
  dplyr::select(person_id, ortho_datetime = procedure_datetime, ortho_date = procedure_date, ortho_location = location, procedure_concept_id, procedure_concept_name) %>%
  compute_new()

has_ortho_px <-
  dplyr::union(has_ortho_px_1, has_ortho_px_2) %>%
  compute_new()
# now apply 120 hrs rule, but for each type of procedure

dx_with_imaging_120_hrs <-
  has_fracture_dx %>%
  inner_join(has_imaging_px) %>%
  mutate(diff_time = sql("EXTRACT (EPOCH FROM imaging_datetime-condition_start_datetime)")) %>%
  mutate(diff_hrs = abs(diff_time) / 3600) %>%
  filter(diff_hrs <= 120) %>%
  distinct(person_id, condition_start_date, imaging_date, dx_location, imaging_location) %>%
  mutate(imaging_location = case_when(is.na(imaging_location) ~ "site_unspec", TRUE ~ imaging_location)) %>%
  compute_new()

dx_with_casting_120_hrs <-
  has_fracture_dx %>%
  inner_join(has_casting_px) %>%
  mutate(diff_time = sql("EXTRACT (EPOCH FROM casting_datetime-condition_start_datetime)")) %>%
  mutate(diff_hrs = abs(diff_time) / 3600) %>%
  filter(diff_hrs <= 120) %>%
  distinct(person_id, condition_start_date, casting_date, dx_location, casting_location) %>%
  mutate(casting_location = case_when(is.na(casting_location) ~ "site_unspec", TRUE ~ casting_location)) %>%
  compute_new()

dx_with_ortho_120_hrs <-
  has_fracture_dx %>%
  inner_join(has_ortho_px) %>%
  mutate(diff_time = sql("EXTRACT (EPOCH FROM ortho_datetime-condition_start_datetime)")) %>%
  mutate(diff_hrs = abs(diff_time) / 3600) %>%
  filter(diff_hrs <= 120) %>%
  distinct(person_id, condition_start_date, ortho_date, dx_location, ortho_location) %>%
  mutate(ortho_location = case_when(is.na(ortho_location) ~ "site_unspec", TRUE ~ ortho_location)) %>%
  compute_new()



# combine them all

# logic on combining dates: use earliest (lowest) among the 4 dates.
# To avoid NA issues, set up temporary dates set way into future.

# logic on combining locations: set temporary locations where completely unspecified replaced with NA
# then use coalesce to get first non-NA location based on ordering

#Method 1 is now orthopedic only -- per LU (V4) check whether it doesn't require an additional dx


method1 <-
  results_tbl("cdm_procedure_occurrence") %>%
  inner_join(load_codeset("px_ortho_lu") %>% filter(sufficient=="Y"), by = c("procedure_concept_id" = "concept_id")) %>%
  left_join(proc_location %>% dplyr::select(procedure_concept_id, location)) %>%
  mutate(location = case_when(is.na(location) ~ "site_unspec", TRUE ~ location)) %>%
  distinct(person_id, event_date = procedure_date, location) %>%
  mutate(method = 1, location_source="orthopedic") %>%
  compute_new()


# Method 2 - pulling from dx and a procedure
# If same ortho procedure used in Method 1 and Method 2, then 
# rollup based on whether dx is earlier than procedure

method2_detailed <-
  dx_with_imaging_120_hrs %>%
  full_join(dx_with_casting_120_hrs) %>%
  full_join(dx_with_ortho_120_hrs) %>%
  mutate(
    dx2 = case_when(!str_detect(dx_location, "site_unspec") & !str_detect(dx_location, "multi_site") ~ dx_location),
    ortho2 = case_when(!str_detect(ortho_location, "site_unspec") & !str_detect(ortho_location, "multi_site") ~ ortho_location),
    casting2 = case_when(!str_detect(casting_location, "site_unspec") & !str_detect(casting_location, "multi_site") ~ casting_location)
  ) %>%
  mutate(location = coalesce(ortho2, dx2, casting2)) %>%
  mutate(location_source = case_when(location==ortho2~"orthopedic", 
                                     location==dx2~"dx", 
                                     location==casting2~"casting")) %>%
  mutate(
    casting_date2 = case_when(is.na(casting_date) ~ as.Date("3000-01-01"), TRUE ~ casting_date),
    imaging_date2 = case_when(is.na(imaging_date) ~ as.Date("3000-01-01"), TRUE ~ imaging_date),
    ortho_date2 = case_when(is.na(ortho_date) ~ as.Date("3000-01-01"), TRUE ~ ortho_date)
  ) %>%
  mutate(event_date = case_when(
    condition_start_date < casting_date2 & condition_start_date < imaging_date2 & condition_start_date < ortho_date2 ~ condition_start_date,
    casting_date2 < imaging_date2 & casting_date2 < ortho_date2 ~ casting_date2,
    imaging_date2 < ortho_date2 ~ imaging_date2,
    TRUE ~ ortho_date2
  )) %>%
  distinct(person_id, event_date, location, location_source, condition_start_date, casting_date, imaging_date, ortho_date, dx_location, casting_location, imaging_location, ortho_location) %>%
  compute_new()

method2 <-
  method2_detailed %>%
  distinct(person_id, event_date, location, location_source) %>%
  mutate(location = case_when(is.na(location) ~ "site_unspec", TRUE ~ location)) %>%
  mutate(method = 2) %>%
  compute_new()





# save results

method2_detailed %>%
  output_tbl("method2_detailed_v4")

method1 %>%
  union_all(method2) %>%
  output_tbl("revised_fracture_def")

### roll-up

# Get first fracture per person, then get locations
# Also get days for 0-6 and 7-179 day windows
# As.Date used because it returns datetime


# initialize  the set you're searching from and the loop counter
# use min() so that if there are somehow same method 1 and method 2 on same day,
# method 1 kept and method 2 dropped

search_set <- results_tbl("revised_fracture_def")  %>%
  group_by(person_id, location, event_date ) %>%
  summarise(method=min(method)) %>%
  inner_join(results_tbl("revised_fracture_def")) %>%
  ungroup() %>%
  compute_new()
  
tally_set <- search_set %>%
  filter(!is.na(location)) %>%
  summarise(n = n()) %>%
  pull()

loop_counter <- 1


#set up initial fractures 
# -- first fracture for person with a fracture in the search set
# Also set up 7 day and 180 day window markers

init_fx <-
  search_set %>%
  group_by(person_id) %>%
  summarise(event_date = min(event_date)) %>%
  inner_join(search_set) %>%
  distinct(person_id, event_date, location, location_source, method) %>%
  mutate(
    day7 = as.Date(event_date + days(7)),
    day180 = as.Date(event_date + days(180)),
    loop = loop_counter
  ) %>%
  compute_new()

save_fx <- init_fx %>% dplyr:: filter(person_id == -1)  %>%
  compute_new() #this step set up blank init_fx


#Use loop to keep looking ahead.

tally_step_3 <- 0

while (tally_set > 0) {
  
  #init_fx defined prior to entry of loop or at end of loop iteration
  
  # Loop step 1 = 
  # identify 7-day sets to roll up based on date and/or location
  rollup_7 <-
    init_fx %>%
    distinct(person_id, location, check_date = event_date, day7, loop) %>%
    inner_join(search_set) %>%
    filter(event_date >= check_date, event_date < day7) %>%
    compute_new()
  
  add_rollups_from_7 <-
    rollup_7 %>%
    filter(location != "site_unspec") %>%
    dplyr::select(-c(event_date)) %>%
    rename(event_date = check_date) %>%
    mutate(
      day7 = as.Date(event_date + days(7)),
      day180 = as.Date(event_date + days(180)),
      loop = loop_counter
    ) %>%  compute_new()
  
  # Doing rollup this way should mean that new locations in the 0-7 window 
  # are added to the day 0 locations
  
   init_fx <-
     dplyr::union(init_fx ,add_rollups_from_7) %>%
     compute_new()

  

  # Loop Step 2 - look ahead to match unspecified sites.
  unspec_init <- init_fx %>%
    filter(location == "site_unspec") %>%
    compute_new()

  # Use >= check_date so that if there's an unspecified _and_ a specified fracture on same day, the specified fracture absorbs it
  lookahead_location <-
    unspec_init %>%
    distinct(person_id, check_date = event_date, day7, day180, loop) %>%
    inner_join(search_set) %>%
    filter(event_date >= check_date, event_date < day180, location != "site_unspec") %>%
    group_by(person_id, check_date, day7, day180, loop) %>%
    summarise(event_date = min(event_date)) %>%
    inner_join(search_set) %>%
    filter(location != "site_unspec") %>%
    ungroup() %>%
    mutate(event_date = check_date) %>%
    dplyr::select(-check_date) %>%
    distinct() %>%
    compute_new()

  identified_location <-
    unspec_init %>%
    inner_join(lookahead_location %>% distinct(person_id, event_date)) %>%
    compute_new()

  init_fx <-
    init_fx %>%
    anti_join(identified_location) %>%
    dplyr::union(lookahead_location) %>%
    compute_new()
  
  # Loop Step 3 - rollup in the 7-180 day window.

  #Loop 3A find all events 7-179 days with the _same_ location as init_fx
  rollup_180 <-
    init_fx %>%
    distinct(person_id, location, day7, day180) %>%
    inner_join(search_set) %>%
    filter(event_date >= day7, event_date < day180) %>%
    compute_new()
  
  #Loop 3B find all events 7-179 days without specific location
  
  rollup_180_unspec <-
    init_fx %>%
    distinct(person_id, day7, day180) %>%
    inner_join(search_set) %>%
    filter(
      event_date >= day7, event_date < day180,
      is.na(location) | location == "multi_site" | location == "site_unspec"
    ) %>%
    compute_new()
  
  tally_step_3 <- tally_step_3 + rollup_180_unspec %>% summarise(n=n()) %>% pull()
  
  # Loop Step 4 reduce search set by anything rolled up
  
  search_set <-
    search_set %>%
    anti_join(rollup_7 %>% dplyr::select(person_id, event_date)) %>%
    anti_join(rollup_180) %>%
    anti_join(rollup_180_unspec) %>%
    compute_new()
  
  #Loop Step 5  check set count to see whether to continue loop
  tally_set <- search_set %>%
    filter(!is.na(location)) %>%
    summarise(n = n()) %>%
    pull()
  
  
  # Loop Step 6 save as we go along. Save_fx will contain all results
  save_fx <- dplyr::union_all(save_fx, init_fx)
  
  print(tally_set) # printing so I can see progresss and spot if things hang up
  print(loop_counter)
  
  #Loop Step 7 - iterate loop counter for next pass through loop
  loop_counter <- loop_counter + 1
  
  #Loop Step 8 - get new initial fx from new search set
  init_fx <-
    search_set %>%
    group_by(person_id) %>%
    summarise(event_date = min(event_date)) %>%
    inner_join(search_set) %>%
    distinct(person_id, event_date, location, location_source, method) %>%
    mutate(
      day7 = as.Date(event_date + days(7)),
      day180 = as.Date(event_date + days(180)),
      loop = loop_counter
    ) %>%
    compute_new()
  
  # End of loop. If no new specified locations, loop terminates.
}


save_fx %>% output_tbl("rollup_new_def")
