diff --git a/jieba/analyse/tfidf.py b/jieba/analyse/tfidf.py index 680b889f..54dd7c39 100755 --- a/jieba/analyse/tfidf.py +++ b/jieba/analyse/tfidf.py @@ -44,14 +44,16 @@ def __init__(self, idf_path=None): def set_new_path(self, new_idf_path): if self.path != new_idf_path: self.path = new_idf_path - content = open(new_idf_path, 'rb').read().decode('utf-8') - self.idf_freq = {} - for line in content.splitlines(): - word, freq = line.strip().split(' ') - self.idf_freq[word] = float(freq) - self.median_idf = sorted( - self.idf_freq.values())[len(self.idf_freq) // 2] - + try: + content = open(new_idf_path, 'rb').read().decode('utf-8') + self.idf_freq = {} + for line in content.splitlines(): + word, freq = line.strip().split(' ') + self.idf_freq[word] = float(freq) + self.median_idf = sorted( + self.idf_freq.values())[len(self.idf_freq) // 2] + except IOError: + pass def get_idf(self): return self.idf_freq, self.median_idf