-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFraud.py
371 lines (308 loc) · 13.3 KB
/
Fraud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
import csv
import os
import Claim
from datetime import datetime
import time
import holidays
import pprint
import xlsxwriter as xlsxwriter
class Result():
def __init__(self, a, frequency, score,reason):
self.p_id = a.get_p_id()
self.username = a.get_username()
self.d_id = a.get_d_id()
self.b_id = a.get_b_id()
self.d_incident = a.get_d_incident()
self.d_policy = a.get_d_policy()
self.d_report = a.get_d_report()
self.issue = a.get_issue()
self.amount = a.get_amount()
self.location = a.get_location()
self.asset = a.get_asset()
self.c_id = a.get_c_id()
self.age = a.get_age()
self.frequency = frequency
self.score = score
self.reason = reason
def set_frequency(self,value):
self.frequency = value
def set_score(self,value):
self.score = value
def add_reason(self,value):
self.reason += str(value) + "\n"
def toList(self):
lis = []
lis.append(self.username)
lis.append(self.d_id)
lis.append(self.p_id)
lis.append(self.d_policy)
lis.append(self.b_id)
lis.append(self.location)
lis.append(self.issue)
lis.append(self.d_incident)
lis.append(self.d_report)
lis.append(self.amount)
lis.append(self.asset)
lis.append(self.c_id)
lis.append(self.age)
lis.append(self.frequency)
lis.append(self.score)
lis.append(self.reason)
return lis
def toDictionary(self):
dic = {}
dic["Username"] = self.username
dic["Driver ID"] = self.d_id
dic["Policy ID"] = self.p_id
dic["Date of Policy"] = self.d_policy
dic["Block ID"] = self.b_id
dic["Location"] = self.location
dic["Reason for Claim"] = self.issue
dic["Date of Incident"] = self.d_incident
dic["Date of Report"] = self.d_report
dic["Claimed mount"] = self.amount
dic["Insured Asset Value"] = self.asset
dic["Claim ID"] = self.c_id
dic["Age"] = self.age
dic["Frequency"] = self.frequency
dic["Risk Score"] = self.score
dic["Risk Score Factors"] = self.reason
return dic
def days_between(self):
d1 = datetime.strptime(str(self.d_policy).replace("/","-"), "%m-%d-%Y")
d2 = datetime.strptime(str(self.d_incident).replace("/","-"), "%m-%d-%Y")
return abs((d2 - d1).days)
def __eq__(self, other):
if (isinstance(other, Result)):
return self.username == other.username and self.p_id == other.p_id and \
self.d_id == other.d_id and self.b_id == other.b_id and \
self.d_policy == other.d_policy and self.location == other.location and \
self.issue == other.issue and self.d_incident == other.d_incident and \
self.d_report == other.d_report and self.amount == other.amount and \
self.asset == other.asset and self.age == other.age and \
self.frequency == other.frequency and self.score == other.score and \
self.reason == other.reason
return False
def __sybil__(self, other):
if (isinstance(other, Result)):
if (self.username != other.username and self.p_id == other.p_id) or \
(self.username == other.username and self.p_id != other.p_id) or \
(self.username != other.username and self.d_id == other.d_id) or \
(self.username == other.username and self.d_id != other.d_id) or \
(self.p_id == other.p_id and self.d_id != other.d_id) or \
(self.p_id != other.p_id and self.d_id == other.d_id):
return True
else:
return False
def read_csv(dir: str) -> tuple:
# csv file name
filename = dir
# initializing the titles and rows list
fields = []
rows = []
# reading csv file
with open(filename, 'r') as csvfile:
# creating a csv reader object
csvreader = csv.reader(csvfile)
# extracting field names through first row
fields = next(csvreader)
# extracting each data row one by one
for row in csvreader:
rows.append(row)
# get total number of rows
print("Total no. of rows: %d" % (csvreader.line_num))
# printing the field names
print('Field names are:' + ', '.join(field for field in fields))
# printing first 5 rows
print('\nFirst %d rows are:\n'%(csvreader.line_num))
dic = {}
n = 1
p_id_lis = []
for row in rows[:]:
# parsing each column of a row
lis = []
for col in row:
# print("%10s" % col)
lis.append(col)
p_id_lis.append(lis[2])
claim_block = Claim.Claim_block(lis[0],lis[1],lis[2],lis[3],lis[4],lis[5],lis[6],lis[7],lis[8],lis[9],lis[10],lis[12],lis[13])
# result = Result(claim_block,frequency=0,score=0)
dic[n] = claim_block
n += 1
# print('\n')
duplicate_dictionary = lis_duplicate_dictionary(p_id_lis)
return dic, duplicate_dictionary
def lis_duplicate_dictionary(lis):
return {i:lis.count(i) for i in lis}
def inspect(dir,dictionary = True):
dic, duplicate_dictionary = read_csv(dir)
result_dic = {}
n = 0
for i,j in dic.items():
score = 0
frequnecy = int(days_between(j)/duplicate_dictionary[j.get_p_id()])
diff_in_report = days_between(a=j.get_d_incident(), b=j.get_d_report())
diff_in_holidays = int(days_berween_holidays(date_of_incident=str(j.d_incident)))
diff_in_claim_from_policy_holding_date = days_between(a=j.get_d_policy(),b=j.get_d_incident())
claim_times_more_than_liability = int(j.get_amount())/int(j.get_asset())
reason = ""
age_ = int(j.get_age())
# Score based on how much they claim
if (frequnecy <= 100 and frequnecy >= 50):
score += 50
reason += "[Claiming Frequency = %d days]- %d\n" %(frequnecy,50)
elif (frequnecy < 50 and frequnecy >= 30):
score += 200
reason += "[Claiming Frequency = %d days\]- %d\n" % (frequnecy, 200)
elif (frequnecy < 30 and frequnecy > 10):
score += 400
reason += "[Claiming Frequency = %d days]- %d\n" % (frequnecy, 400)
# difference in incident and report
elif (diff_in_report <= 30 and diff_in_report > 7):
score += 150
reason += "[Report - Incident = %d days]- %d\n" % (diff_in_report, 150)
elif (diff_in_report <= 120 and diff_in_report > 30):
score += 350
reason += "[Report - Incident = %d days]- %d\n" % (diff_in_report, 350)
elif (diff_in_report <= 365 and diff_in_report > 120):
score += 500
reason += "[Report - Incident = %d days]- %d\n" % (diff_in_report, 500)
# Reporting close to holidays
if(diff_in_holidays <= 7):
score += 300
reason += "[Holidays - Incident = %d days]- %d\n" % (diff_in_holidays, 300)
elif (diff_in_holidays <= 14 and diff_in_holidays > 7):
score += 150
reason += "[Holidays - Incident = %d days]- %d\n" % (diff_in_holidays, 150)
# Reporting in first 10 days of holding policy
if (diff_in_claim_from_policy_holding_date <= 10):
score += 700
reason += "[Policy date - incident = %d days]- %d\n" % (diff_in_claim_from_policy_holding_date, 700)
# If the claim is more than liability
if (claim_times_more_than_liability > 1 and claim_times_more_than_liability < 1.5):
score += 200
reason += "[Claiming %d times more than insured asset]- %d\n" % (claim_times_more_than_liability, 200)
if (claim_times_more_than_liability > 1.5 and claim_times_more_than_liability <= 2):
score += 250
reason += "[Claiming %d times more than insured asset]- %d\n" % (claim_times_more_than_liability, 250)
if (claim_times_more_than_liability > 2 and claim_times_more_than_liability <= 2.5):
score += 300
reason += "[Claiming %d times more than insured asset]- %d\n" % (claim_times_more_than_liability, 300)
if (claim_times_more_than_liability > 2.5 and claim_times_more_than_liability <= 3):
score += 350
reason += "[Claiming %d times more than insured asset]- %d\n" % (claim_times_more_than_liability, 350)
if (claim_times_more_than_liability > 3):
score += 500
reason += "[Claiming %d times more than insured asset]- %d\n" % (claim_times_more_than_liability, 500)
# Age
if (age_ >= 16 and age_ < 18):
score += 250
reason += "[Claimant age is under 18]- %d\n" % (250)
if (age_ >= 18 and age_ < 21):
score += 200
reason += "[Claimant age is under 21]- %d\n" % (200)
if (age_ >= 21 and age_ <= 29):
score += 150
reason += "[Claimant age is under between 21 to 29]- %d\n" % (150)
if (age_ >= 30 and age_ <= 45):
score += 100
reason += "[Claimant age is under between 30 to 45]- %d\n" % (100)
if (age_ > 45 and age_ <= 60):
score += 50
reason += "[Claimant age is under between 46 to 60]- %d\n" % (50)
result = Result(j,frequnecy, score, reason)
result_dic[j.get_c_id()] = result
for i, j in result_dic.items():
for i_i, j_j in result_dic.items():
if int(i) < int(i_i) and j.__sybil__(j_j):
j.add_reason("Sybil Attack - Conflicts with claim %d" %(int(i_i)))
j_j.add_reason("Sybil Attack - Conflicts with claim %d" % (int(i)))
elif int(i) < int(i_i) and j.__eq__(j_j):
j_j.set_score("Duplicate")
res = {}
for i, j in result_dic.items():
res[i] = result_dic[i].toList().copy()
return res
def days_between(cliam_object=None,a=None,b=None):
if (a == None and b == None and isinstance(cliam_object,Claim.Claim_block)):
d1 = datetime.strptime(str(cliam_object.get_d_policy()).replace("/", "-"), "%m-%d-%Y")
d2 = datetime.strptime(str(cliam_object.get_d_incident()).replace("/", "-"), "%m-%d-%Y")
return abs((d2 - d1).days)
elif (isinstance(a, str) and isinstance(b, str) and cliam_object == None):
if "/" in str(a):
d1 = datetime.strptime(str(a).replace("/", "-"), "%m-%d-%Y")
else:
d1 = datetime.strptime(a, "%Y-%m-%d")
if "/" in str(b):
d2 = datetime.strptime(str(b).replace("/", "-"), "%m-%d-%Y")
else:
d2 = datetime.strptime(b, "%Y-%m-%d")
return abs((d2 - d1).days)
else:
return None
def days_berween_holidays(date_of_incident, country = "UnitedStates"):
year = str(datetime.today().year)
if ("-" in date_of_incident):
ls = date_of_incident.split("-")
year = int(ls[0])
elif ("/" in date_of_incident):
ls = date_of_incident.split("/")
year = int(ls[2])
# Select country
us_holidays = holidays.UnitedStates()
holi = []
# Print all the holidays in UnitedKingdom in year 2018
for i,j in holidays.UnitedStates(years=year).items():
holi.append(str(i))
shotest_defference = 365
for i in holi:
diff = int(days_between(a = str(date_of_incident),b = i))
if shotest_defference > diff:
shotest_defference = diff
return shotest_defference
def create_cvs(dir: str):
dic = inspect(dir)
current_directory = os.getcwd()
final_directory = os.path.join(current_directory, r'Result')
if not os.path.exists(final_directory):
os.makedirs(final_directory)
name = str(dir.replace("Excel_data/","").replace(".csv","").replace(".",""))
workbook = xlsxwriter.Workbook('Result/' + name + '.xlsx')
worksheet = workbook.add_worksheet()
worksheet.write('A1', 'Username')
worksheet.write('B1', 'Driver Id')
worksheet.write('C1', 'Policy Number')
worksheet.write('D1', 'D.O.P')
worksheet.write('E1', 'Block Id')
worksheet.write('F1', 'Location')
worksheet.write('G1', 'Issue')
worksheet.write('H1', 'D.O.I')
worksheet.write('I1', 'D.O.R')
worksheet.write('J1', 'Amount')
worksheet.write('K1', 'Asset')
worksheet.write('L1', 'Claim Id')
worksheet.write('M1', 'Age')
worksheet.write('N1', 'Frequency')
worksheet.write('O1', 'Risk Score')
n = 0
for i,j in dic.items():
n += 1
worksheet.write('A' + str(n + 1), j[0])
worksheet.write('B' + str(n + 1), j[1])
worksheet.write('C' + str(n + 1), j[2])
worksheet.write('D' + str(n + 1), j[3])
worksheet.write('E' + str(n + 1), j[4])
worksheet.write('F' + str(n + 1), j[5])
worksheet.write('G' + str(n + 1), j[6])
worksheet.write('H' + str(n + 1), j[7])
worksheet.write('I' + str(n + 1), j[8])
worksheet.write('J' + str(n + 1), j[9])
worksheet.write('K' + str(n + 1), j[10])
worksheet.write('L' + str(n + 1), j[11])
worksheet.write('M' + str(n + 1), j[12])
worksheet.write('N' + str(n + 1), j[13])
worksheet.write('O' + str(n + 1), j[14])
workbook.close()
# pprint.pprint(inspect("Excel_data/1625246842.4975214.csv"))
# 1625571009.4605668.csv
print(create_cvs("Excel_data/1626487638.3813422.csv"))