forked from mongodb/docs-realm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCalculate_Readability_Score.py
48 lines (44 loc) · 2.17 KB
/
Calculate_Readability_Score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# Import OS utility library for file manipulation.
import os
# Import Path utility library for directory crawling.
from pathlib import Path
# Import readability scoring library.
import textstat
# Config variable for which directory to scan for text files to score.
text_files_dir = './output'
# Config variable for which directory to store scores in.
scores_dir = './scores'
# Config variable for scores file name.
scores_file_name = 'scores.md'
# Creating the scores file.
with open(os.path.join(scores_dir, scores_file_name), 'w') as scores_file:
scores_file.write('Readability scores for changed documents: \n')
# Find all text files to be scored.
for text_file_path in Path(text_files_dir).rglob('*.txt'):
# Read text to be scored from test file.
with open(text_file_path, 'r') as text_file:
text = text_file.read()
# Score text.
flesch_reading_ease_score = textstat.flesch_reading_ease(text)
flesch_grade_level = textstat.flesch_kincaid_grade(text)
# Write score to file.
with open(os.path.join(scores_dir, scores_file_name), 'a') as scores_file:
text_file_path_without_extension = str(text_file_path).replace('.txt', '')
text_file_path_without_extension_and_without_prefix_path = text_file_path_without_extension.replace('output/','')
scores_file.write('- **' + str(text_file_path_without_extension_and_without_prefix_path) +
'**: Grade Level: ' + str(flesch_grade_level) +
', Reading Ease: ' + str(flesch_reading_ease_score) + '\n')
# Include a reference table for Flesch Reading Ease score.
with open(os.path.join(scores_dir, scores_file_name), 'a') as scores_file:
scores_file.writelines(['\n', 'For Grade Level, aim for 8 or below.', '\n', '\n',
'For Reading Ease scores, aim for 60 or above:\n', '\n',
'''| Score | Difficulty |
|-------|-------------------|
|90-100 | Very Easy |
| 80-89 | Easy |
| 70-79 | Fairly Easy |
| 60-69 | Medium |
| 50-59 | Fairly Hard |
| 30-49 | Hard |
| 0-29 | Very Hard |''' , '\n', '\n'
'For help improving readability, try [Hemingway App](https://hemingwayapp.com/).','\n'])