diff --git a/partb2.py b/partb2.py index 5f655e9..140080c 100644 --- a/partb2.py +++ b/partb2.py @@ -1,5 +1,22 @@ -# Part B Task 2 import re -import os -import sys +import argparse +# parse input arguments +parser = argparse.ArgumentParser() +parser.add_argument('path_to_file', help = 'path to the csv file') +args = parser.parse_args() + +# open file, add all lines to a single string +file_given = open(args.path_to_file) +f = "" +for line in file_given: + f += line + " " +file_given.close() + +# remove non-alphabetic characters, replace all whitespace characters with a +# single whitespace, and change all uppercase characters to lowercase +f = re.sub(r'[^a-zA-Z\s]', r'', f) +f = re.sub(r'\s+', r' ', f) +f = f.lower() + +print(f)