CoCalc -- word

⁴³¹ views
ubuntu2004
1
def process_word(word):
2
        remove = '.!?,"\'()*_:;0123456789'
3
        word = word.strip()
4
        for character in remove:
5
                word = word.replace(character," ")
6

7
        word = word.lower()
8
        return word
9
#Close on this one but you needed to pass remove to the strip function, and it would do the work for you. As it was it added additional spaces which caused problems later on.
10

11
def process_word(word):
12
    proc_word = word.strip('.!?,"\'()*_:;0123456789')
13
    proc_word = proc_word.replace("'", '')
14
    proc_word = proc_word.lower()
15
    return proc_word
16

17
def process_line(line):
18
        line = line.strip()  #no need to do this since you've done it above.
19
        line = line.replace("-"," ")
20
        words = line.split()
21
        processed_words =  []
22
        for word in words:
23
           #very close on this, you needed to first get the length of the processed word and make sure it was > 0
24
            pw = process_word(word)
25
            if len(pw)>0:   #then append
26
                processed_words.append(pw)
27
            #processed_words.append(process_word(word))
28
        return processed_words
29

30
def process_file(path):
31
        with open(path) as file:
32
                words = []
33
                data = file.readlines()
34

35
                for line in data:
36
                        words = words + process_line(line)
37
        return words 
38

39
def find_unique(words):
40
    unique_words = []
41
    for word in words:
42
        if word not in unique_words:
43
            unique_words.append(word)
44
    return unique_words
45

46
def find_frequency(words):
47
        freq_dict = {}
48
        for word in words:
49
                if word in freq_dict:
50
                        freq_dict[word]+=1
51
                else:
52
                        freq_dict[word]=1
53
        return freq_dict
54

55
def remove_stop(words,stop):
56
        non_stop_words = []
57
        for word in words:
58
                if word not in stop:
59
                        non_stop_words.append(word)
60

61
        return non_stop_words
62

63
def most_common(freq_dict,n):
64
        freq_list = []
65
        for item in list(freq_dict.items()):
66
                val = (item[1],item[0])
67
                freq_list.append(val)
68
        freq_list.sort(reverse=True)
69
        print(f"{'Word':<12}{'Count':>4}")
70
        print(("-"*16))
71
        for index in range(0,n):
72
                count = freq_list[index][0]
73
                word = freq_list[index][1]
74
                print(f"{word: <12}{count: >4}")
75

76
def count_by_length(words):
77
        count_dict = {}
78
        for word in words:
79
                length = len(word)
80
                if length in count_dict:
81
                        count_dict[length]+=1
82
                else:
83
                        count_dict[length]=1
84
        count_list = []
85
        for item in list(count_dict.items()):
86
                count_list.append(item)
87

88
        count_list.sort(reverse=True)
89
        print(f"{'Length':<12}{'Count':>4}")
90
        print(("-"*16))
91
        for item in count_list:
92
                print(f"{item[0]: <12}{item[1]: >4}")
93

94
def count_by_first(words):
95
        count_dict = {}
96
        for word in words:
97
                char= word[0]
98
                if char in count_dict:
99
                        count_dict[char]+=1
100
                else:
101
                        count_dict[char]=1
102
        count_list = []
103
        for item in list(count_dict.items()):
104
                count_list.append(item)
105

106
        count_list.sort(reverse=True)
107
        print(f"{'Letter':<12}{'Count':>4}")
108
        print(("-"*16))
109
        for item in count_list:
110
                print(f"{item[0]: <12}{item[1]: >4}") 
111
Product

Resources

Company