Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
566 views
ubuntu2004
1
def process_word(word):
2
remove = '.!?,"\'()*_:;0123456789'
3
word = word.strip()
4
for character in remove:
5
word = word.replace(character," ")
6
7
word = word.lower()
8
return word
9
10
def process_line(line):
11
line = line.strip()
12
line = line.replace("-"," ")
13
words = line.split()
14
processed_words = []
15
for word in words:
16
processed_words.append(process_word(word))
17
return processed_words
18
19
def process_file(path):
20
with open(path) as file:
21
words = []
22
data = file.readlines()
23
24
for line in data:
25
words = words + process_line(line)
26
return words
27
28
def find_unique(words):
29
unique_words = []
30
for word in words:
31
if word not in unique_words:
32
unique_words.append(word)
33
return unique_words
34
35
def find_frequency(words):
36
freq_dict = {}
37
for word in words:
38
if word in freq_dict:
39
freq_dict[word]+=1
40
else:
41
freq_dict[word]=1
42
return freq_dict
43
44
def remove_stop(words,stop):
45
non_stop_words = []
46
for word in words:
47
if word not in stop:
48
non_stop_words.append(word)
49
50
return non_stop_words
51
52
def most_common(freq_dict,n):
53
freq_list = []
54
for item in list(freq_dict.items()):
55
val = (item[1],item[0])
56
freq_list.append(val)
57
freq_list.sort(reverse=True)
58
print(f"{'Word':<12}{'Count':>4}")
59
print(("-"*16))
60
for index in range(0,n):
61
count = freq_list[index][0]
62
word = freq_list[index][1]
63
print(f"{word: <12}{count: >4}")
64
65
def count_by_length(words):
66
count_dict = {}
67
for word in words:
68
length = len(word)
69
if length in count_dict:
70
count_dict[length]+=1
71
else:
72
count_dict[length]=1
73
count_list = []
74
for item in list(count_dict.items()):
75
count_list.append(item)
76
77
count_list.sort(reverse=True)
78
print(f"{'Length':<12}{'Count':>4}")
79
print(("-"*16))
80
for item in count_list:
81
print(f"{item[0]: <12}{item[1]: >4}")
82
83
def count_by_first(words):
84
count_dict = {}
85
for word in words:
86
char= word[0]
87
if char in count_dict:
88
count_dict[char]+=1
89
else:
90
count_dict[char]=1
91
count_list = []
92
for item in list(count_dict.items()):
93
count_list.append(item)
94
95
count_list.sort(reverse=True)
96
print(f"{'Letter':<12}{'Count':>4}")
97
print(("-"*16))
98
for item in count_list:
99
print(f"{item[0]: <12}{item[1]: >4}")
100