ubuntu2004
def process_word(word):1remove = '.!?,"\'()*_:;0123456789'2word = word.strip()3for character in remove:4word = word.replace(character," ")56word = word.lower()7return word8#Close on this one but you needed to pass remove to the strip function, and it would do the work for you. As it was it added additional spaces which caused problems later on.910def process_word(word):11proc_word = word.strip('.!?,"\'()*_:;0123456789')12proc_word = proc_word.replace("'", '')13proc_word = proc_word.lower()14return proc_word1516def process_line(line):17line = line.strip() #no need to do this since you've done it above.18line = line.replace("-"," ")19words = line.split()20processed_words = []21for word in words:22#very close on this, you needed to first get the length of the processed word and make sure it was > 023pw = process_word(word)24if len(pw)>0: #then append25processed_words.append(pw)26#processed_words.append(process_word(word))27return processed_words2829def process_file(path):30with open(path) as file:31words = []32data = file.readlines()3334for line in data:35words = words + process_line(line)36return words3738def find_unique(words):39unique_words = []40for word in words:41if word not in unique_words:42unique_words.append(word)43return unique_words4445def find_frequency(words):46freq_dict = {}47for word in words:48if word in freq_dict:49freq_dict[word]+=150else:51freq_dict[word]=152return freq_dict5354def remove_stop(words,stop):55non_stop_words = []56for word in words:57if word not in stop:58non_stop_words.append(word)5960return non_stop_words6162def most_common(freq_dict,n):63freq_list = []64for item in list(freq_dict.items()):65val = (item[1],item[0])66freq_list.append(val)67freq_list.sort(reverse=True)68print(f"{'Word':<12}{'Count':>4}")69print(("-"*16))70for index in range(0,n):71count = freq_list[index][0]72word = freq_list[index][1]73print(f"{word: <12}{count: >4}")7475def count_by_length(words):76count_dict = {}77for word in words:78length = len(word)79if length in count_dict:80count_dict[length]+=181else:82count_dict[length]=183count_list = []84for item in list(count_dict.items()):85count_list.append(item)8687count_list.sort(reverse=True)88print(f"{'Length':<12}{'Count':>4}")89print(("-"*16))90for item in count_list:91print(f"{item[0]: <12}{item[1]: >4}")9293def count_by_first(words):94count_dict = {}95for word in words:96char= word[0]97if char in count_dict:98count_dict[char]+=199else:100count_dict[char]=1101count_list = []102for item in list(count_dict.items()):103count_list.append(item)104105count_list.sort(reverse=True)106print(f"{'Letter':<12}{'Count':>4}")107print(("-"*16))108for item in count_list:109print(f"{item[0]: <12}{item[1]: >4}")110111