ubuntu2004
def process_word(word):1remove = '.!?,"\'()*_:;0123456789'2word = word.strip()3for character in remove:4word = word.replace(character," ")56word = word.lower()7return word89def process_line(line):10line = line.strip()11line = line.replace("-"," ")12words = line.split()13processed_words = []14for word in words:15processed_words.append(process_word(word))16return processed_words1718def process_file(path):19with open(path) as file:20words = []21data = file.readlines()2223for line in data:24words = words + process_line(line)25return words2627def find_unique(words):28unique_words = []29for word in words:30if word not in unique_words:31unique_words.append(word)32return unique_words3334def find_frequency(words):35freq_dict = {}36for word in words:37if word in freq_dict:38freq_dict[word]+=139else:40freq_dict[word]=141return freq_dict4243def remove_stop(words,stop):44non_stop_words = []45for word in words:46if word not in stop:47non_stop_words.append(word)4849return non_stop_words5051def most_common(freq_dict,n):52freq_list = []53for item in list(freq_dict.items()):54val = (item[1],item[0])55freq_list.append(val)56freq_list.sort(reverse=True)57print(f"{'Word':<12}{'Count':>4}")58print(("-"*16))59for index in range(0,n):60count = freq_list[index][0]61word = freq_list[index][1]62print(f"{word: <12}{count: >4}")6364def count_by_length(words):65count_dict = {}66for word in words:67length = len(word)68if length in count_dict:69count_dict[length]+=170else:71count_dict[length]=172count_list = []73for item in list(count_dict.items()):74count_list.append(item)7576count_list.sort(reverse=True)77print(f"{'Length':<12}{'Count':>4}")78print(("-"*16))79for item in count_list:80print(f"{item[0]: <12}{item[1]: >4}")8182def count_by_first(words):83count_dict = {}84for word in words:85char= word[0]86if char in count_dict:87count_dict[char]+=188else:89count_dict[char]=190count_list = []91for item in list(count_dict.items()):92count_list.append(item)9394count_list.sort(reverse=True)95print(f"{'Letter':<12}{'Count':>4}")96print(("-"*16))97for item in count_list:98print(f"{item[0]: <12}{item[1]: >4}")99100