-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathclassification_HRC.py
43 lines (30 loc) · 1.05 KB
/
classification_HRC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from lda import ClassificationModel, Viewer, Info
def classification_HRC():
##### PARAMETERS #####
identifier = 'HRC_LDA_T15P985I1200_word2vec'
path = 'html/%s/DocumentFeatures.csv' % identifier
info = Info()
info.data = 'HRC'
info.identifier = 'classification'
info.classifierType = 'DecisionTree'
targetFeature = 'targetCategory1'
droplist = ['File', 'Unnamed: 0']
### PREPROCESSING ###
model = ClassificationModel(path, targetFeature, droplist, binary=False)
### SELECT TEST AND TRAINING DATA ###
model.createTarget()
model.dropFeatures()
model.splitDataset(len(model.data)/2)
### CLASSIFICATION ###
model.buildClassifier(info.classifierType)
model.trainClassifier()
model.predict()
### EVALUATION ###
model.evaluate()
model.evaluation.confusionMatrix()
if not info.classifierType=='NeuralNet':
model.computeFeatureImportance()
model.getTaggedDocs()
Viewer(info).classificationResults(model)
if __name__ == "__main__":
classification_HRC()