from sklearn.feature_extraction.text import CountVectorizer
#----------------------------
# 讀入資料, text file
#----------------------------
f=open('doc01.txt', 'r', encoding='utf8')
doc=f.read()
#----------------------------
# 印出原文
#----------------------------
print('原文')
print(doc.encode('raw_unicode_escape').decode('utf-8'))
#----------------------------
# 找出文章中的特徵文字
#----------------------------
# min_df表示至少有多少比例的文件有此特徵值
vectorizer=CountVectorizer(min_df=1)
content=[]
content.append(doc)
stat=vectorizer.fit_transform(content)
print('特徵文字')
print(vectorizer.get_feature_names())
print('特徵文字統計')
print(stat.toarray()[0])
About 200 unaccompanied minors have been left with nowhere to sleep, our Europe reporter Gavin Lee says.
Some 30 of them have been offered housing in a warehouse, but the situation for the rest remains unclear.
They are said to be becoming increasingly desperate.
Nearly 5,600 people have been moved to reception centres since Monday, the government said in a statement (in French), including about 1,500 unaccompanied minors being housed in an on-site container camp.