-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample.py
80 lines (46 loc) · 2.14 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import pandas as pd
import useful_functions as uf
# import seaborn as sns
@uf.timer
def a_function(s):
for _ in range(s):
pass
a_function(9109999)
# print(uf.list_to_str([1, 2, 3], [4, 5, 6], ["ABD", "ZUB"]))
a_list = ["12", "13", "3", "44", "s"]
# print(uf.join_list(" ", a_list))
# print(uf.str_preproces_1("He44llo ÀÈÌÒÙỲǸẀWo323rld"))
corpus = """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis ultricies condimentum dui eget viverra. Phasellus ut pharetra justo. Curabitur interdum venenatis nisi vel aliquam. Donec auctor, tortor ac tristique convallis, felis turpis varius nisl, ac elementum neque leo ac quam. Proin in mattis risus. In eu dolor nulla. Praesent eleifend laoreet interdum. Nam pellentesque tellus ut est hendrerit, eget ornare ipsum interdum. Donec imperdiet, nisl sed bibendum vulputate, nulla ante lobortis sapien, eu gravida neque libero vitae massa."""
# print(uf.str_count("your corpus here", "here"))
string_list = ["ABC", "DEF", "GEMM", "ababv", "asdwd", "ABscw"]
# print(uf.str_remove_lowercase(string_list))
# print(uf.str_remove_uppercase(string_list))
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
# print(uf.standard_deviation(numbers))
# to_str = uf.pdf_to_str("rand.pdf")
# print(to_str)
# file_location = "FILE LOCATION"
#
# df_original = pd.read_csv(file_location)
# df_class_label = df_original[["class_label"]]
#
# uf.class_distribution("class_label", df_class_label)
# print(
# uf.stemming(
# "The best definition of man is: a being that walks on two legs and is ungrateful"
# )
# )
# print(uf.random_int_generator(1, 31, 6))
print(
uf.deutsche_remove_stop_words_and_punc(
"Die etymologischen Vorformen von deutsch bedeuteten ursprünglich „zum Volk gehörig“"
)
)
sentence_1 = "I’ve been waiting for uploading this on github for a long time"
sentence_2 = "This is not that bad!"
print(uf.custom_padding(sentence_1, sentence_2))
print(
uf.remove_stopwords_german(
"Jeder Satz wurde per Crowdsourcing entweder als unterstützendes Argument, als angreifendes Argument oder als kein Argument in Bezug auf das Thema kommentiert"
)
)