-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathacute_cost.R
182 lines (161 loc) · 7.64 KB
/
acute_cost.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
rm(list = ls())
gc()
if(Sys.info()[4] %in% c("LAZ-ID1",
"LAZ-ID2",
"LAZ-DEID1",
"LAZ-DEID2")){
setwd("E:/CT_APCD/Sai/suicide_cost")
}
library(methods)
source("utils.R")
need_pkgs <- c("data.table", "bit64", "tools", "touch", "haven", "xlsx",
"dplyr", "lubridate", "tidyverse", "matrixStats", "berryFunctions")
need.packages(need_pkgs)
source("get_suicide_attempts.R")
pt <- fread('output/sc_15_no_sc_13_14.csv', colClasses = 'character',
select = c("INTERNAL_MEMBER_ID", "birth_dt",
"GENDER_CODE")) %>% unique()
pt[, age := 2015 - as.integer(birth_dt)]
pt <- pt[age < 65 & age > 9]
age_group <- list(c(10:64), c(10:24), c(25:44), c(45:64))
gender_group <- list(c("M", "F"), "M", "F")
sc <- rbindlist(lapply(9:10, function(x){
fread(paste0("output/dgx_mc_2015_sc_icd", x, ".csv"),
colClasses = "character",
select = c("MEDICAL_CLAIM_SERVICE_LINE_ID",
"INTERNAL_MEMBER_ID",
"sc_flag"))[INTERNAL_MEMBER_ID %in% pt$INTERNAL_MEMBER_ID &
sc_flag == "1"]
}))
sc <- unique(sc, use.key = FALSE)
sc2 <- fread('output/all_claims_SA_non_SA_20220131.csv', colClasses = 'character',
select = c("MEDICAL_CLAIM_SERVICE_LINE_ID",
"MEDICAL_CLAIM_HEADER_ID", "first_service_dt",
"sc_15_no_sc_13_14"))[
sc_15_no_sc_13_14 == "1"][, -c("sc_15_no_sc_13_14")]
sc2 <- unique(sc2, use.key = FALSE)
sc <- sc2[, - "MEDICAL_CLAIM_HEADER_ID"][sc, on = "MEDICAL_CLAIM_SERVICE_LINE_ID"]
sc[, first_service_dt := mdy(first_service_dt )]
hd_ol <- fread("E:/CT_APCD/Abby/suicide/costs/explore_header_ids/fsc_header_cluster.csv",
colClasses = "character",
select = c("MEDICAL_CLAIM_SERVICE_LINE_ID", "MEDICAL_CLAIM_HEADER_ID",
"first_service_dt"))
sc2o <- rbind(sc2, hd_ol) %>% unique(use.key = FALSE)
i <- "total"
indir <- paste0("E:/CT_APCD/Sai/intermediate_data/",
"cost_measure_intermediate_data/",
"cost_files_by_year/")
## no overlap
mccost <- rbind(fread(paste0(indir, i,
"_", 2016, "_all_ages.csv"),
colClasses = "character"),
fread(paste0(indir, i,
"_", 2017, "_all_ages.csv"),
colClasses = "character"),
fread(paste0(indir, i,
"_", 2015, "_all_ages.csv"),
colClasses = "character"))[INTERNAL_MEMBER_ID %in% pt$INTERNAL_MEMBER_ID]
names(mccost) <- tolower(names(mccost))
mccost <- mccost[, c("internal_member_id",
"first_service_dt",
"medical_claim_header_id","allowed_amt")]
names(mccost) <- toupper(names(mccost))
mccost[, ALLOWED_AMT := as.numeric(ALLOWED_AMT)]
mccost <- mccost[ALLOWED_AMT >= 0]
mccost <- unique(mccost, use.key = FALSE)
mccost <- sc2[, -"first_service_dt"][mccost, on = "MEDICAL_CLAIM_HEADER_ID"]
sc3 <- mccost[, lapply(.SD, min), .SDcol = "FIRST_SERVICE_DT", by = "INTERNAL_MEMBER_ID"]
mccost <- mccost[sc3, on = c("INTERNAL_MEMBER_ID", "FIRST_SERVICE_DT")]
mccost <- mccost[, -"MEDICAL_CLAIM_SERVICE_LINE_ID"]
mccost <- unique(mccost, use.key = FALSE)
uniqueN(mccost$INTERNAL_MEMBER_ID)
smy <- mccost[, lapply(.SD, sum), .SDcol = "ALLOWED_AMT", by = "INTERNAL_MEMBER_ID"]
smy <- smy[pt, on = "INTERNAL_MEMBER_ID"]
smy[is.na(smy)] <- 0
out <- data.table()
for (gender in gender_group){
for(ages in age_group){
tmp1 <- smy[age %in% ages & GENDER_CODE %in% gender]
tmp.out <- c(age = paste0(min(ages), "-", max(ages)),
gender = paste0(gender[1], "-", gender[length(gender)]),
summary(tmp1$ALLOWED_AMT)[4],
sd = sd(tmp1$ALLOWED_AMT),
summary(tmp1$ALLOWED_AMT)[1],
summary(tmp1$ALLOWED_AMT)[6],
summary(tmp1$ALLOWED_AMT)[2],
summary(tmp1$ALLOWED_AMT)[3],
summary(tmp1$ALLOWED_AMT)[5],
quantile(tmp1$ALLOWED_AMT, 0.95),
quantile(tmp1$ALLOWED_AMT, 0.99))
out <- rbind(out, data.table(t(tmp.out)))
}
}
out <- rbind(out[1:5, ], out[9, ],
out[6:8, ], out[10:12, ])
out <- insertRows(out, c(2, 6, 9, 13), NA)
fwrite(out, file = paste0("sc_current_header_id_cost", ".csv"))
## overlap
mccost_o <- rbind(fread(paste0(indir, i,
"_", 2016, "_all_ages.csv"),
colClasses = "character"),
fread(paste0(indir, i,
"_", 2017, "_all_ages.csv"),
colClasses = "character"),
fread(paste0(indir, i,
"_", 2015, "_all_ages.csv"),
colClasses = "character"))[INTERNAL_MEMBER_ID %in% pt$INTERNAL_MEMBER_ID]
lsd <- rbindlist(lapply(2015:2017, function(yr){
unique(fread(paste0("E:/CT_APCD/Sai/intermediate_data/cost_measure_intermediate_data/medical_fiscalyear_", yr, ".csv"),
select = c("INTERNAL_MEMBER_ID", "first_service_dt", "last_service_dt",
MEDICAL_CLAIM_HEADER_ID))[INTERNAL_MEMBER_ID %in% pt$INTERNAL_MEMBER_ID])
}))
rbind(fread(paste0(indir, i,
"_", 2016, "_all_ages.csv"),
colClasses = "character"),
fread(paste0(indir, i,
"_", 2017, "_all_ages.csv"),
colClasses = "character"),
fread(paste0(indir, i,
"_", 2015, "_all_ages.csv"),
colClasses = "character"))[INTERNAL_MEMBER_ID %in% pt$INTERNAL_MEMBER_ID]
names(mccost_o) <- tolower(names(mccost_o))
mccost <- mccost_o[, c("internal_member_id",
"first_service_dt",
"medical_claim_header_id","allowed_amt")]
names(mccost) <- toupper(names(mccost))
mccost[, ALLOWED_AMT := as.numeric(ALLOWED_AMT)]
mccost <- mccost[ALLOWED_AMT >= 0]
mccost <- unique(mccost, use.key = FALSE)
mccost <- sc2o[, -"first_service_dt"][mccost, on = "MEDICAL_CLAIM_HEADER_ID"]
sc3o1 <- mccost[, lapply(.SD, min), .SDcol = "FIRST_SERVICE_DT", by = "INTERNAL_MEMBER_ID"]
mccost2 <- mccost[sc3o1, on = c("INTERNAL_MEMBER_ID", "FIRST_SERVICE_DT")]
mccost3 <- mccost[MEDICAL_CLAIM_HEADER_ID %in% hd_ol$MEDICAL_CLAIM_HEADER_ID]
mccost4 <- rbind(mccost2, mccost3) %>% unique(use.key = FALSE)
mccost4 <- mccost4[, -"MEDICAL_CLAIM_SERVICE_LINE_ID"]
mccost4 <- unique(mccost4, use.key = FALSE)
uniqueN(mccost4$INTERNAL_MEMBER_ID)
smy <- mccost4[, lapply(.SD, sum), .SDcol = "ALLOWED_AMT", by = "INTERNAL_MEMBER_ID"]
smy <- smy[pt, on = "INTERNAL_MEMBER_ID"]
smy[is.na(smy)] <- 0
out <- data.table()
for (gender in gender_group){
for(ages in age_group){
tmp1 <- smy[age %in% ages & GENDER_CODE %in% gender]
tmp.out <- c(age = paste0(min(ages), "-", max(ages)),
gender = paste0(gender[1], "-", gender[length(gender)]),
summary(tmp1$ALLOWED_AMT)[4],
sd = sd(tmp1$ALLOWED_AMT),
summary(tmp1$ALLOWED_AMT)[1],
summary(tmp1$ALLOWED_AMT)[6],
summary(tmp1$ALLOWED_AMT)[2],
summary(tmp1$ALLOWED_AMT)[3],
summary(tmp1$ALLOWED_AMT)[5],
quantile(tmp1$ALLOWED_AMT, 0.95),
quantile(tmp1$ALLOWED_AMT, 0.99))
out <- rbind(out, data.table(t(tmp.out)))
}
}
out <- rbind(out[1:5, ], out[9, ],
out[6:8, ], out[10:12, ])
out <- insertRows(out, c(2, 6, 9, 13), NA)
fwrite(out, file = paste0("sc_current_header_id_cost_overlap", ".csv"))