partb1 completed
This commit is contained in:
parent
1c2258939e
commit
1c39aa1873
2 changed files with 158 additions and 2 deletions
125
partb1.csv
Normal file
125
partb1.csv
Normal file
|
@ -0,0 +1,125 @@
|
|||
filename,documentID
|
||||
001.txt,JDKC-105M
|
||||
002.txt,PUQK-674B
|
||||
003.txt,VPBC-005
|
||||
004.txt,YWCE-738I
|
||||
005.txt,GHXO-669P
|
||||
006.txt,KAWQ-187J
|
||||
007.txt,SOCR-012L
|
||||
008.txt,KDHH-278R
|
||||
009.txt,JLCK-314G
|
||||
010.txt,QXYP-302
|
||||
011.txt,KBIU-888S
|
||||
012.txt,AYEY-671H
|
||||
013.txt,KKXQ-499
|
||||
014.txt,ZVCZ-635K
|
||||
015.txt,HRAM-828D
|
||||
016.txt,HHIF-616S
|
||||
017.txt,MLGR-878X
|
||||
018.txt,NIXS-836H
|
||||
019.txt,OESP-334
|
||||
020.txt,MQHK-220P
|
||||
021.txt,BWQF-732
|
||||
022.txt,AOXA-744D
|
||||
023.txt,AMCX-238Z
|
||||
024.txt,ZRLB-963
|
||||
025.txt,ZJXS-236K
|
||||
026.txt,OGVB-726B
|
||||
027.txt,OTPT-625L
|
||||
028.txt,DLOI-457S
|
||||
029.txt,UGNG-987P
|
||||
030.txt,VLGL-512K
|
||||
031.txt,MEVF-928T
|
||||
032.txt,XTUU-890C
|
||||
033.txt,LMFL-110
|
||||
034.txt,PIGH-858P
|
||||
035.txt,RIZC-127R
|
||||
036.txt,YBHW-577V
|
||||
037.txt,CRGU-326
|
||||
038.txt,EXMG-013B
|
||||
039.txt,XBFV-441P
|
||||
040.txt,PTXN-906
|
||||
041.txt,AJUO-808R
|
||||
042.txt,FZHV-289W
|
||||
043.txt,IFWQ-428L
|
||||
044.txt,GFIB-810N
|
||||
045.txt,LNQR-256
|
||||
046.txt,PVYR-593X
|
||||
047.txt,CXWG-362
|
||||
048.txt,AVVP-372N
|
||||
049.txt,HIKP-557S
|
||||
050.txt,JMGH-608I
|
||||
051.txt,RZIA-145G
|
||||
052.txt,IBKV-251K
|
||||
053.txt,KTUL-361B
|
||||
054.txt,PUZZ-195H
|
||||
055.txt,RSHE-829
|
||||
056.txt,BTAR-174V
|
||||
057.txt,TSAM-385Q
|
||||
058.txt,RSYY-734T
|
||||
059.txt,SSUD-401
|
||||
060.txt,SBAC-693P
|
||||
061.txt,LFWR-772Y
|
||||
062.txt,TSBI-281
|
||||
063.txt,RINK-392
|
||||
064.txt,ETZD-846
|
||||
065.txt,GQMX-983
|
||||
066.txt,GGJQ-271
|
||||
067.txt,PHYU-165S
|
||||
068.txt,HLNI-938D
|
||||
069.txt,PIMH-385U
|
||||
070.txt,LWEW-582E
|
||||
071.txt,BTCD-438F
|
||||
072.txt,EJFC-205
|
||||
073.txt,XWBA-608
|
||||
074.txt,UGXI-811F
|
||||
075.txt,FICA-623T
|
||||
076.txt,AWMI-088K
|
||||
077.txt,YSNS-697P
|
||||
078.txt,YZJY-617P
|
||||
079.txt,XFXG-118T
|
||||
080.txt,AEBA-345H
|
||||
081.txt,SDNM-432V
|
||||
082.txt,FZHJ-523
|
||||
083.txt,MEYM-146
|
||||
084.txt,ELPI-149T
|
||||
085.txt,USJW-494
|
||||
086.txt,OJIB-671D
|
||||
087.txt,ANKW-165P
|
||||
088.txt,EFHN-444
|
||||
089.txt,MGOB-327W
|
||||
090.txt,WDCS-487
|
||||
091.txt,EALY-521Z
|
||||
092.txt,FRPL-275B
|
||||
093.txt,FLZT-426
|
||||
094.txt,CFQJ-830
|
||||
095.txt,SPIL-111S
|
||||
096.txt,HNRN-134B
|
||||
097.txt,UDND-112
|
||||
098.txt,EZDK-705A
|
||||
099.txt,JJWD-835
|
||||
100.txt,ESHL-668Y
|
||||
101.txt,LXNO-661O
|
||||
102.txt,YCUZ-432
|
||||
103.txt,HQRE-637M
|
||||
104.txt,TQCI-200A
|
||||
105.txt,ZQTE-982B
|
||||
106.txt,CFHG-288
|
||||
107.txt,UJKR-627
|
||||
108.txt,YHWW-255C
|
||||
109.txt,AMLY-573J
|
||||
110.txt,RHGF-926Y
|
||||
111.txt,VYRH-360S
|
||||
112.txt,AMRX-523T
|
||||
113.txt,GPEK-672T
|
||||
114.txt,AGCR-591A
|
||||
115.txt,WNFK-465I
|
||||
116.txt,EEMR-682A
|
||||
117.txt,XMBY-038T
|
||||
118.txt,ETJN-385Z
|
||||
119.txt,ZFZV-394
|
||||
120.txt,BLVY-265
|
||||
121.txt,ERRM-330E
|
||||
122.txt,JFGO-085F
|
||||
123.txt,KGPU-366S
|
||||
124.txt,PMFQ-998Z
|
|
35
partb1.py
35
partb1.py
|
@ -1,5 +1,36 @@
|
|||
## Part B Task 1
|
||||
|
||||
import re
|
||||
import pandas as pd
|
||||
import os
|
||||
import argparse
|
||||
|
||||
# parse input arguments
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('path_to_csv', help = 'path to the csv file')
|
||||
args = parser.parse_args()
|
||||
|
||||
# regex pattern matching document ID
|
||||
pattern = r'[A-Z]{4}-\d{3}[a-zA-Z]?'
|
||||
|
||||
os.chdir(os.getcwd() + '/cricket')
|
||||
|
||||
document_ids = []
|
||||
filenames = []
|
||||
|
||||
# open every file, search each line for the document ID, add it to the list
|
||||
for filename in os.listdir():
|
||||
filenames.append(filename)
|
||||
f = open(filename)
|
||||
|
||||
for line in f:
|
||||
results = re.findall(pattern, line)
|
||||
if len(results) != 0:
|
||||
document_ids.append(results[0])
|
||||
|
||||
f.close()
|
||||
|
||||
# construct a Series with the document IDs and filenames, and create a CSV
|
||||
s = pd.Series(data = document_ids, index = filenames)
|
||||
s.rename_axis('filename', inplace = True)
|
||||
s.rename('documentID', inplace = True)
|
||||
os.chdir('..')
|
||||
s.to_csv(args.path_to_csv)
|
||||
|
|
Loading…
Reference in a new issue