forked from mengsiwei/data_process_shanghai_covid_19
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmatrix.py
89 lines (67 loc) · 2.35 KB
/
matrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 15 May 2022 22:31
@author: Siwei Meng
"""
import os.path
import shutil
import xlwt
import datetime
from mpmath import *
import mpmath as mp
def main():
date_start = datetime.date(2022, 3, 1)
fileArr = get_file_name(date_start, 6)
for i in range(10):
file = fileArr[i]
filename = "./data/day_7/" + file + "_清洗结果.txt"
wordsfile = "./data/day_7/" + file + "_50高频词.txt"
workbook = xlwt.Workbook(encoding='utf-8')
worksheet = workbook.add_sheet(file)
print(worksheet)
wordslist = open(wordsfile, encoding='utf-8', errors='ignore').read()
wordslist = wordslist.strip().split(' ')
print(wordslist)
f = open(filename, "r", encoding='utf-8', errors='ignore')
sentenceList = f.readlines()
# print(sentenceList)
num = len(sentenceList)
for p in range(len(wordslist)):
worksheet.write(0, p, wordslist[p])
workbook.save(file + "_matrix.xls")
global k
for i in range(num):
sentence = sentenceList[i] # 返回单元格中的数据
sentence = sentence.strip().split(' ')
# print(sentence)
k = 0
for single_word in sentence:
if k < 50:
if single_word == wordslist[k]:
worksheet.write(i + 1, k, 1)
k = k + 1
else:
# worksheet.write(i+1,k,0)
k = k + 1
else:
break
matrixname = file + "_matrix.xls"
workbook.save(matrixname)
# 移动到文件夹matrix下
aa = os.getcwd()
matrix_dir = os.path.join(aa, matrixname)
targer_path = r"data/matrix"
shutil.move(matrix_dir, targer_path)
def get_file_name(date_start,cycle):
date_start = datetime.date(2022, 3, 1)
cycle = 6
filenameArr = []
for i in range(10):
date = date_start + datetime.timedelta(cycle)
filename = str(date_start) + '-' + str(date)
date_start = date + datetime.timedelta(1)
filenameArr.append(filename)
return filenameArr
if __name__== '__main__':
main()