import re import argparse # parse input arguments parser = argparse.ArgumentParser() parser.add_argument('path_to_file', help = 'path to the csv file') args = parser.parse_args() # open file, add all lines to a single string file_given = open(args.path_to_file) f = "" for line in file_given: f += line + " " file_given.close() # remove non-alphabetic characters, replace all whitespace characters with a # single whitespace, and change all uppercase characters to lowercase f = re.sub(r'[^a-zA-Z\s]', r'', f) f = re.sub(r'\s+', r' ', f) f = f.lower() print(f)