#!/usr/bin/python """ This program analyzes the a text that is given to it, counting every individual word. It uses a red-black tree to store the infomation in an ordered manner. It is able to report on them, and give basic statistics about the tree used. * It relies on a RedBlackTree object, that you must write. The API is as given in the assignment. """ __author__ = "Adam A. Smith" __version__ = "2023.10.21" from redblacktree import RedBlackTree import re # load a file & make a HashTable from its words def _make_red_black_tree(filename:str) -> RedBlackTree: red_black_tree = RedBlackTree() with open(filename) as file: for line in file: tokens = re.split(r"[^A-Za-z0-9'_]+", line.lower()) for token in tokens: # remove leading/trailing apostrophes if token.startswith("'"): token = re.sub(r"^'+", "", token) if token.endswith("'"): token = re.sub(r"'+$", "", token) # skip if it was just apostrophes or otherwise empty if len(token) == 0: continue # add to table if (red_black_tree.contains_key(token)): freq = red_black_tree.get(token) + 1 red_black_tree.put(token, freq) else: red_black_tree.put(token, 1) return red_black_tree # enter a loop to query the user for searches & commands def _query_user(tree : RedBlackTree): print("The text contains " +str(len(tree))+ " unique words.") print('Please enter a word to query, or "!help" for help, or "!exit" to exit.') while True: print("> ", end="") query = input().strip() # do nothing on empty string if len(query) == 0: pass # special commands starting with ! elif query[0] == '!': # quit on "!quit" or "!exit" if query == "!quit" or query == "!exit": break # print stats on the backing table elif query == "!stats": num_red_nodes = tree.count_red_nodes() print("Tree table statistics:") print("\tSize (n): " +str(len(tree))) print("\tHeight: " +str(tree.calc_height()) + " (" +str(tree.calc_black_height()) + " black)") print("\tAvg node depth:", "{:1.3f}".format(tree.calc_average_depth())) print("\t# red nodes:", num_red_nodes, "({:1.1f}%)".format(100.0*num_red_nodes/len(tree))) print('\tRoot key: "' + tree.get_root_key() +'"') # help menu elif query == "!help": _print_help_menu() # unknown special command else: print("I didn't recognize that. Try \"!help\"?") # size or select elif query[0] == '#': if len(query) == 1: print("The text contains " +str(len(tree))+ " unique words.") else: rank = _parse_int(query[1:]) word = tree.select(rank) if word == None: print("There is no such word. Try a number from 0-" +str(len(tree)-1) +".") else: print("Word #" +str(rank)+ " is \"" +word+ "\".") # get rank elif query[0] == '&': word = query[1:] rank = tree.find_rank(word) if rank == -1: print('"'+ word+ '" is not in the text.') else: print('"'+ word+ '" is word #' +str(rank)+ '.') # predecessor / first elif query[0] == '<': if len(query) == 1: print('The first word (alphabetically) is "' +tree.find_first_key() +'".') else: query = query[1:] predecessor = tree.find_predecessor(query) if predecessor == None: print('Nothing comes before "' +query+ '" alphabetically.') else: print('"' +predecessor+ '" comes before "' +query+ '" alphabetically.') # successor / last elif query[0] == '>': if len(query) == 1: print('The last word (alphabetically) is "' +tree.find_last_key() +'".') else: query = query[1:] successor = tree.find_successor(query) if successor == None: print('Nothing comes after "' +query+ '" alphabetically.') else: print('"' +successor+ '" comes after "' +query+ '" alphabetically.') # deletion elif query[0] == '-': word_to_delete = query[1:] if len(word_to_delete) == 0: print("What word do you want to delete? Please try again.") else: value = tree.delete(word_to_delete) if (value is None): print('"' +word_to_delete+ '" was not present.') elif value == 1: print('1 entry of "' +word_to_delete+ '" has been deleted.') else: print(str(value) + ' entries of "' +word_to_delete+ '" have been deleted.') # add a word or increase its frequency elif query[0] == '+': new_word = query[1:] if len(new_word) == 0: print("What word do you want to add? Please try again.") elif tree.contains_key(new_word): freq = tree.get(new_word) + 1 tree.put(new_word, freq) print('"' +new_word+ '" now appears ' +str(freq)+ '×.') else: tree.put(new_word, 1) print('"' +new_word+ '" now appears 1×.') # regular query else: freq = tree.get(query) if freq is None or freq == 0: print('"' +query+ '" is not in the text.') else: print('"' +query+ '" appears ' +str(freq)+ '× in the text.') print("Goodbye!") # just prints a help menu def _print_help_menu(): print("\tword\t\tprints word frequency") print("\t-word\t\tdeletes word") print("\t+word\t\tadds word") print("\t<\t\tgets the first word") print("\t>\t\tgets the last word") print("\tword\t\tfinds the successor of word") print("\t#\t\tprints the tree size") print("\t#number\t\tprints the word with rank number") print("\t&word\t\tprints the rank of word") print("\t!help\t\tprints this help menu") print("\t!stats\t\tprints in-depth stats on the table") print("\t!exit\t\texits the program") # parse int, but return -1 if invalid def _parse_int(string): try: return int(string) except ValueError: return -1 # pseudo main() function if __name__ == "__main__": # test command-line args from sys import argv, exit, stderr # make sure we have a file if len(argv) != 2: print("Please enter a file name!", file=stderr) exit(0) # make the tree, and query on it try: tree = _make_red_black_tree(argv[1]) _query_user(tree) except FileNotFoundError: print("Couldn't open file \"" +argv[1]+ "\".", file=stderr) exit(1)