comp20008-project01/partb2.py

23 lines
570 B
Python
Raw Normal View History

2021-03-01 17:57:17 +11:00
import re
2021-04-11 19:43:49 +10:00
import argparse
2021-03-01 17:57:17 +11:00
2021-04-11 19:43:49 +10:00
# parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument('path_to_file', help = 'path to the csv file')
args = parser.parse_args()
# open file, add all lines to a single string
file_given = open(args.path_to_file)
f = ""
for line in file_given:
f += line + " "
file_given.close()
# remove non-alphabetic characters, replace all whitespace characters with a
# single whitespace, and change all uppercase characters to lowercase
f = re.sub(r'[^a-zA-Z\s]', r'', f)
f = re.sub(r'\s+', r' ', f)
f = f.lower()
print(f)