/* Emoticon message cleanser: * * Skeleton code written by Farhana Choudhury and Jianzhong Qi, April 2020 * * Authorship Declaration: * (1) I certify that the program contained in this submission is completely * my own individual work, except where explicitly noted by comments that * provide details otherwise. I understand that work that has been developed * by another student, or by me in collaboration with other students, * or by non-students as a result of request, solicitation, or payment, * may not be submitted for assessment in this subject. I understand that * submitting for assessment work developed by or in collaboration with * other students or non-students constitutes Academic Misconduct, and * may be penalized by mark deductions, or by other penalties determined * via the University of Melbourne Academic Honesty Policy, as described * at https://academicintegrity.unimelb.edu.au. * * (2) I also certify that I have not provided a copy of this work in either * softcopy or hardcopy or any other form to any other student, and nor will * I do so until after the marks are released. I understand that providing * my work to other students, regardless of my intention or any undertakings * made to me by that other student, is also Academic Misconduct. * * (3) I further understand that providing a copy of the assignment * specification to any form of code authoring or assignment tutoring * service, or drawing the attention of others to such services and code * that may have been made available via such a service, may be regarded * as Student General Misconduct (interfering with the teaching activities * of the University and/or inciting others to commit Academic Misconduct). * I understand that an allegation of Student General Misconduct may arise * regardless of whether or not I personally make use of such solutions * or sought benefit from such actions. * * Signed by: Rory Healy 964275 * Dated: 9th April 2020 */ #include #include #include #include #define STAGE_NUM_ONE 1 /* stage numbers */ #define STAGE_NUM_TWO 2 #define STAGE_NUM_THREE 3 #define STAGE_NUM_FOUR 4 #define STAGE_NUM_FIVE 5 #define STAGE_HEADER "Stage %d\n==========\n" /* stage header format string */ #define MAX_MSG_LENGTH 280 /* maximum message length */ #define MAX_NUM_MSGS 100 /* maximum number of messages */ #define MAX_EMTCN_LENGTH 50 /* maximum emot. line length */ #define MAX_NUM_EMTCN 50 /* maximum number of emot. */ typedef char msg_t[MAX_MSG_LENGTH+1]; /* a message */ typedef char emtcn_t[MAX_EMTCN_LENGTH+1]; /* an emoticon */ /****************************************************************/ /* function prototypes */ void read_one_msg(msg_t one_msg, int max_len); void print_stage_header(int stage_num); int count_tokens(msg_t one_msg); void stage_one(msg_t one_msg); void stage_two(msg_t msgs[], int *num_msgs); void stage_three(msg_t msgs[], int num_msgs); void stage_four(emtcn_t emtcns[], int *num_emtcns); void stage_five(msg_t msgs[], int num_msgs, emtcn_t emtcns[], int num_emtcns); /* add your own function prototypes here */ int is_seperating_line(msg_t one_msg); void remove_leading_commas(msg_t one_msg, int msg_len); void remove_trailing_commas(msg_t one_msg, int msg_len); void remove_consecutive_commas(msg_t one_msg, int msg_len); int length_consecutive_commas(msg_t one_msg, int msg_len); void get_next_emtcn(msg_t one_msg, emtcn_t emtcn_msg, \ int *emtcn_len, int *msg_offset); void remove_curr_emtcn(msg_t one_msg, int emtcn_len, int offset, int *msg_len); int get_emtcn_len(msg_t one_msg, int msg_len, int msg_offset); int is_emtcns_same(emtcn_t emtcn1, emtcn_t emtcn2, int emtcn_len); int is_in_dict(emtcn_t curr_emtcn, int curr_emtcn_len, \ emtcn_t emtcns[], int num_emtcns); /****************************************************************/ /* main function controls all the action, do NOT modify this function */ int main(int argc, char *argv[]) { /* to hold all input messages */ msg_t msgs[MAX_NUM_MSGS]; /* to hold the number of input messages */ int num_msgs = 0; /* to hold all input emoticons in the dictionary */ emtcn_t emtcns[MAX_NUM_EMTCN]; /* to hold the number of emoticons in the dictionary */ int num_emtcns = 0; /* stage 1: reading the first message */ stage_one(msgs[num_msgs]); num_msgs++; /* stage 2: removing alphanumeric characters */ stage_two(msgs, &num_msgs); /* stage 3: removing extra commas */ stage_three(msgs, num_msgs); /* stage 4: reading the dictionary and finding the longest emoticon */ stage_four(emtcns, &num_emtcns); /* stage 5: removing invalid emoticons with the help of the dictionary */ stage_five(msgs, num_msgs, emtcns, num_emtcns); /* all done; take some rest */ return 0; } /* read a line of input into one_msg */ void read_one_msg(msg_t one_msg, int max_len) { int i = 0, c; while (((c = getchar()) != EOF) && (c != '\n') && (c != '\r')) { if (i < max_len) { one_msg[i++] = c; } else { printf("Invalid input line, toooooooo long.\n"); exit(EXIT_FAILURE); } } one_msg[i] = '\0'; } /* print stage header given stage number */ void print_stage_header(int stage_num) { printf(STAGE_HEADER, stage_num); } /****************************************************************/ /* add your code below */ /* removes an emtcn from a msg given a length and offset */ void remove_curr_emtcn(msg_t one_msg, int emtcn_len, int offset, int *msg_len) { msg_t msg_before, msg_after; int i, j; /* the message before the emtcn not in dict */ for (i = 0; i < offset; i++) { msg_before[i] = one_msg[i]; } msg_before[i] = '\0'; /* the message after the emtcn not in dict */ for (j = offset + emtcn_len; one_msg[j] != '\0'; j++) { msg_after[j - offset - emtcn_len] = one_msg[j]; } msg_after[j - offset - emtcn_len] = '\0'; /* replacing one_msg with msg_before and msg_after */ one_msg[0] = 0; for (i = 0; i < *msg_len - emtcn_len; i++) { if (i < offset) { one_msg[i] = msg_before[i]; } else { one_msg[i] = msg_after[i - offset]; } } one_msg[i] = '\0'; // here is where a bug occurs in test 1 *msg_len -= emtcn_len; } /* finds the next emoticon in one_msg and copies it to emtcn_msg */ void get_next_emtcn(msg_t one_msg, emtcn_t emtcn_msg, \ int *emtcn_len, int *msg_offset) { int i; for (i = *msg_offset; one_msg[i] != '\0'; i++) { if (one_msg[i] == ',') { break; } else { emtcn_msg[i - *msg_offset] = one_msg[i]; } } *emtcn_len = i - *msg_offset; emtcn_msg[*emtcn_len] = '\0'; *msg_offset += *emtcn_len + 1; // add 1 to account for the comma } /* tests if two emtcns are identical */ int is_emtcns_same(emtcn_t emtcn1, emtcn_t emtcn2, int emtcn_len) { for (int i = 0; i < emtcn_len; i++) { if (emtcn1[i] != emtcn2[i]) { return 0; } } return 1; } /* tests if an emoticon is in the dictionary emtcns[] */ int is_in_dict(emtcn_t curr_emtcn, int curr_emtcn_len, \ emtcn_t emtcns[], int num_emtcns) { /* the current emtcn from emtcns[] being compared */ emtcn_t emtcn_from_dict; int i, j; for (i = 0; i < num_emtcns; i++) { /* extracts the emoticon from the dictionary */ for (j = 0; emtcns[i][j] != ','; j++) { emtcn_from_dict[j] = emtcns[i][j]; } emtcn_from_dict[j] = '\0'; /* compares lengths first to avoid unnecessary function calls */ if (curr_emtcn_len == j) { if (is_emtcns_same(emtcn_from_dict, curr_emtcn, curr_emtcn_len)) { return 1; } } } return 0; } /* removes leading commas from a message */ void remove_leading_commas(msg_t one_msg, int msg_len) { /* counts the number of leading commas */ int i = 0; while (one_msg[i] == ',') { i += 1; } /* counts the number of characters copied into new_msg */ int j; /* the message without leading commas */ msg_t new_msg; for (j = 0; j < msg_len; j++) { if (one_msg[j + i] == '\0') { break; } else { new_msg[j] = one_msg[j + i]; } } new_msg[j] = '\0'; /* clears one_msg, and replaces with new_msg */ one_msg[0] = 0; strncpy(one_msg, new_msg, j); one_msg[j] = '\0'; } /* removes trailing commas from a message */ void remove_trailing_commas(msg_t one_msg, int msg_len) { /* counts the number of trailing commas */ int i = 0; while (one_msg[msg_len - i - 1] == ',') { i += 1; } /* counts the number of characters copied into new_msg */ int j; /* the message without trailing commas */ msg_t new_msg; for (j = 0; j < msg_len - i; j++) { new_msg[j] = one_msg[j]; } new_msg[j] = '\0'; /* clears one_msg, and replaces with new_msg */ one_msg[0] = 0; strncpy(one_msg, new_msg, j); one_msg[j] = '\0'; } /* removes consecutive commas from within a message */ void remove_consecutive_commas(msg_t one_msg, int msg_len) { one_msg[msg_len] = '\0'; // safety net int max_commas_length = length_consecutive_commas(one_msg, msg_len); while (max_commas_length > 1) { int i = 0, j = 0; msg_t new_msg, msg_cat; /* find out where the consecutive commas start */ for (i = 0; i < msg_len; i++) { if ((one_msg[i] == ',') && (one_msg[i + 1] == ',')) { break; } } /* add all characters including one comma to new_msg */ strncpy(new_msg, one_msg, i); new_msg[i] = '\0'; /* find out how long the rest of the message is */ for (j = 0; j < msg_len; j++) { if (one_msg[j + i + max_commas_length - 1] == '\0') { break; } else { msg_cat[j] = one_msg[j + i + max_commas_length - 1]; } } msg_cat[j] = '\0'; /* and add the rest of the message to new_msg (exluding the extra * commas */ strncat(new_msg, msg_cat, j); new_msg[i + j] = '\0'; /* clear one_msg and copy the edited message to it */ one_msg[0] = 0; strncpy(one_msg, new_msg, i + j); one_msg[i + j] = '\0'; msg_len = i + j; /* Loop guard */ max_commas_length = length_consecutive_commas(one_msg, msg_len); } } /* finds the length of the first non-one length of commas */ int length_consecutive_commas(msg_t one_msg, int msg_len) { int i, counter = 0; for (i = 0; i < msg_len; i++) { if (one_msg[i] == ',') { if (one_msg[i + 1] == ',') { counter += 1; continue; } else { if (counter) { break; } } } } return counter + 1; } /* checks if the current message is a seperating line */ int is_seperating_line(msg_t one_msg) { msg_t seperating_line = "###"; if (!strncmp(one_msg, seperating_line, strlen(seperating_line))) { return 1; } return 0; } /* scan a message and count the number of tokens in it */ int count_tokens(msg_t one_msg) { /* this counts the number of commas, which is analogous to the number of tokens */ int num_tokens = 1; for (int i = 0; i < (int)strlen(one_msg); i++) { if (one_msg[i] == ',') { num_tokens += 1; } } return num_tokens; } /****************************************************************/ /* stages 1 - 5 */ /* stage 1: reading the first message */ void stage_one(msg_t one_msg) { /* print stage header */ print_stage_header(STAGE_NUM_ONE); /* read the first message */ read_one_msg(one_msg, MAX_MSG_LENGTH); /* count and print the number of tokens */ printf("Number of tokens: %d\n\n", count_tokens(one_msg)); } /* stage 2: removing alphanumeric characters */ void stage_two(msg_t msgs[], int *num_msgs) { print_stage_header(STAGE_NUM_TWO); /* *num_msgs will be used as an index, hence why it is zeroed */ *num_msgs = 0; /* the current message being analysed */ msg_t curr_msg; /* as msgs[0] already has a message in it, copy that to curr_msg * and use that in the loop first, then read more messages */ strncpy(curr_msg, msgs[0], strlen(msgs[0])); curr_msg[strlen(msgs[0])] = '\0'; do { if (is_seperating_line(curr_msg)) { break; } /* the message without alphanumeric characters */ msg_t new_msg; /* the current character length of new_msg */ int len = 0; for (int j = 0; j <= (int)strlen(curr_msg); j++) { if (!isalnum(curr_msg[j])) { new_msg[len++] = curr_msg[j]; } } new_msg[len] = '\0'; /* Copy the edited message to msgs[] */ strncpy(msgs[*num_msgs], new_msg, len); msgs[*num_msgs][len] = '\0'; /* prepare for the next message to be read */ read_one_msg(curr_msg, MAX_MSG_LENGTH); *num_msgs += 1; } while (*num_msgs <= MAX_NUM_MSGS); for (int i = 0; i < *num_msgs; i++) { printf("%s\n", msgs[i]); } printf("\n"); } /* stage 3: removing extra commas */ void stage_three(msg_t msgs[], int num_msgs) { print_stage_header(STAGE_NUM_THREE); int i, curr_msg_len; msg_t curr_msg; for (i = 0; i < num_msgs; i++) { /* copy the current message stored in msgs[i] to curr_msg */ curr_msg_len = strlen(msgs[i]); strncpy(curr_msg, msgs[i], curr_msg_len); curr_msg[curr_msg_len] = '\0'; /* remove extra commas from curr_msg */ remove_leading_commas(curr_msg, curr_msg_len); curr_msg_len = strlen(curr_msg); remove_trailing_commas(curr_msg, curr_msg_len); curr_msg_len = strlen(curr_msg); remove_consecutive_commas(curr_msg, curr_msg_len); curr_msg_len = strlen(curr_msg); /* Clear the current message stored in msgs[i] and replace * with the edited message stored in curr_msg */ msgs[i][0] = 0; strncpy(msgs[i], curr_msg, strlen(curr_msg)); msgs[i][curr_msg_len] = '\0'; } for (int j = 0; j < num_msgs; j++) { printf("%s\n", msgs[j]); } printf("\n"); } /* stage 4: reading the dictionary and finding the longest emoticon */ void stage_four(emtcn_t emtcns[], int *num_emtcns) { print_stage_header(STAGE_NUM_FOUR); /* the emoticon to be read */ emtcn_t new_emtcn; while (*num_emtcns <= MAX_NUM_EMTCN) { read_one_msg(new_emtcn, MAX_EMTCN_LENGTH); /* break as soon as the line is empty */ if ((int)strlen(new_emtcn) == 0) { break; } strncpy(emtcns[*num_emtcns], new_emtcn, strlen(new_emtcn)); emtcns[*num_emtcns][(int)strlen(new_emtcn)] = '\0'; *num_emtcns += 1; } emtcn_t max_emtcn; int max_emtcn_len = 0, curr_len, i, j; for (i = 0; i < *num_emtcns; i++) { /* get the length of emoticon, and compare it to the max length */ for (j = 0; j < (int)strlen(emtcns[i]); j++) { if (emtcns[i][j] == ',') { break; } } curr_len = j; if (curr_len > max_emtcn_len) { max_emtcn_len = curr_len; strncpy(max_emtcn, emtcns[i], j); max_emtcn[j] = '\0'; } } printf("Emoticon total: %d\nLongest: %s\nLength: %d\n", *num_emtcns, max_emtcn, max_emtcn_len); printf("\n"); } /* stage 5: removing invalid emoticons with the help of the dictionary */ void stage_five(msg_t msgs[], int num_msgs, emtcn_t emtcns[], int num_emtcns) { print_stage_header(STAGE_NUM_FIVE); int i, j; /* the length of current message from msgs[] */ int curr_msg_len = 0; /* the current emtcn from msgs[] */ emtcn_t curr_emtcn; int curr_emtcn_len = 0; /* the index of the curr_emtcn from msgs[] */ int curr_offset = 0; for (i = 0; i < num_msgs; i++) { for (j = 0; msgs[i][j] != '\0'; j++) { /* do nothing, count characters before \0 */ } curr_msg_len = j; while (curr_offset < curr_msg_len) { /* get the next emoticon, check if it's in emtcns, and remove it * from msgs[i] if its not in emtcns */ get_next_emtcn(msgs[i], curr_emtcn, &curr_emtcn_len, &curr_offset); if (!is_in_dict(curr_emtcn, curr_emtcn_len, emtcns, num_emtcns)) { /* this is much easier to do with strncat, but can't be used * here due to assignment restrictions */ remove_curr_emtcn(msgs[i], curr_emtcn_len, \ (curr_offset - curr_emtcn_len - 1), \ &curr_msg_len); } } curr_msg_len = 0; curr_offset = 0; } for (i = 0; i < num_msgs; i++) { if (msgs[i][0] == '\0') { continue; } printf("%s\n", msgs[i]); } } /* * _ _ _ _ * /\ | | (_) | | | | * / \ | | __ _ ___ _ __ _ | |_ | |__ _ __ ___ ___ * / /\ \ | | / _` | / _ \ | '__| | | | __| | '_ \ | '_ ` _ \ / __| * / ____ \ | | | (_| | | (_) | | | | | | |_ | | | | | | | | | | \__ \ * /_/ \_\ |_| \__, | \___/ |_| |_| \__| |_| |_| |_| |_| |_| |___/ * __/ | * |___/ * ______ _ * /\ | ____| | | * / \ _ __ ___ | |__ _ _ _ __ | | * / /\ \ | '__| / _ \ | __| | | | | | '_ \ | | * / ____ \ | | | __/ | | | |_| | | | | | |_| * /_/ \_\ |_| \___| |_| \__,_| |_| |_| (_) * */