From cf9299e50381664a16c6de59e8ea6a2e995e2deb Mon Sep 17 00:00:00 2001 From: Rory Healy Date: Sun, 11 Apr 2021 19:43:49 +1000 Subject: [PATCH] partb2 complete --- partb2.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/partb2.py b/partb2.py index 5f655e9..140080c 100644 --- a/partb2.py +++ b/partb2.py @@ -1,5 +1,22 @@ -# Part B Task 2 import re -import os -import sys +import argparse +# parse input arguments +parser = argparse.ArgumentParser() +parser.add_argument('path_to_file', help = 'path to the csv file') +args = parser.parse_args() + +# open file, add all lines to a single string +file_given = open(args.path_to_file) +f = "" +for line in file_given: + f += line + " " +file_given.close() + +# remove non-alphabetic characters, replace all whitespace characters with a +# single whitespace, and change all uppercase characters to lowercase +f = re.sub(r'[^a-zA-Z\s]', r'', f) +f = re.sub(r'\s+', r' ', f) +f = f.lower() + +print(f)