comp10002-project01/program.c
2022-02-15 14:51:00 +11:00

584 lines
17 KiB
C

/* Emoticon message cleanser:
*
* Skeleton code written by Farhana Choudhury and Jianzhong Qi, April 2020
*
* Authorship Declaration:
* (1) I certify that the program contained in this submission is completely
* my own individual work, except where explicitly noted by comments that
* provide details otherwise. I understand that work that has been developed
* by another student, or by me in collaboration with other students,
* or by non-students as a result of request, solicitation, or payment,
* may not be submitted for assessment in this subject. I understand that
* submitting for assessment work developed by or in collaboration with
* other students or non-students constitutes Academic Misconduct, and
* may be penalized by mark deductions, or by other penalties determined
* via the University of Melbourne Academic Honesty Policy, as described
* at https://academicintegrity.unimelb.edu.au.
*
* (2) I also certify that I have not provided a copy of this work in either
* softcopy or hardcopy or any other form to any other student, and nor will
* I do so until after the marks are released. I understand that providing
* my work to other students, regardless of my intention or any undertakings
* made to me by that other student, is also Academic Misconduct.
*
* (3) I further understand that providing a copy of the assignment
* specification to any form of code authoring or assignment tutoring
* service, or drawing the attention of others to such services and code
* that may have been made available via such a service, may be regarded
* as Student General Misconduct (interfering with the teaching activities
* of the University and/or inciting others to commit Academic Misconduct).
* I understand that an allegation of Student General Misconduct may arise
* regardless of whether or not I personally make use of such solutions
* or sought benefit from such actions.
*
* Signed by: Rory Healy 964275
* Dated: 9th April 2020
*/
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#define STAGE_NUM_ONE 1 /* stage numbers */
#define STAGE_NUM_TWO 2
#define STAGE_NUM_THREE 3
#define STAGE_NUM_FOUR 4
#define STAGE_NUM_FIVE 5
#define STAGE_HEADER "Stage %d\n==========\n" /* stage header format string */
#define MAX_MSG_LENGTH 280 /* maximum message length */
#define MAX_NUM_MSGS 100 /* maximum number of messages */
#define MAX_EMTCN_LENGTH 50 /* maximum emot. line length */
#define MAX_NUM_EMTCN 50 /* maximum number of emot. */
typedef char msg_t[MAX_MSG_LENGTH+1]; /* a message */
typedef char emtcn_t[MAX_EMTCN_LENGTH+1]; /* an emoticon */
/****************************************************************/
/* function prototypes */
void read_one_msg(msg_t one_msg, int max_len);
void print_stage_header(int stage_num);
int count_tokens(msg_t one_msg);
void stage_one(msg_t one_msg);
void stage_two(msg_t msgs[], int *num_msgs);
void stage_three(msg_t msgs[], int num_msgs);
void stage_four(emtcn_t emtcns[], int *num_emtcns);
void stage_five(msg_t msgs[], int num_msgs, emtcn_t emtcns[], int num_emtcns);
/* add your own function prototypes here */
int is_seperating_line(msg_t one_msg);
void remove_leading_commas(msg_t one_msg, int msg_len);
void remove_trailing_commas(msg_t one_msg, int msg_len);
void remove_consecutive_commas(msg_t one_msg, int msg_len);
int length_consecutive_commas(msg_t one_msg, int msg_len);
void get_next_emtcn(msg_t one_msg, emtcn_t emtcn_msg, \
int *emtcn_len, int *msg_offset);
void remove_curr_emtcn(msg_t one_msg, int emtcn_len, int offset, int *msg_len);
int get_emtcn_len(msg_t one_msg, int msg_len, int msg_offset);
int is_emtcns_same(emtcn_t emtcn1, emtcn_t emtcn2, int emtcn_len);
int is_in_dict(emtcn_t curr_emtcn, int curr_emtcn_len, \
emtcn_t emtcns[], int num_emtcns);
/****************************************************************/
/* main function controls all the action, do NOT modify this function */
int
main(int argc, char *argv[]) {
/* to hold all input messages */
msg_t msgs[MAX_NUM_MSGS];
/* to hold the number of input messages */
int num_msgs = 0;
/* to hold all input emoticons in the dictionary */
emtcn_t emtcns[MAX_NUM_EMTCN];
/* to hold the number of emoticons in the dictionary */
int num_emtcns = 0;
/* stage 1: reading the first message */
stage_one(msgs[num_msgs]);
num_msgs++;
/* stage 2: removing alphanumeric characters */
stage_two(msgs, &num_msgs);
/* stage 3: removing extra commas */
stage_three(msgs, num_msgs);
/* stage 4: reading the dictionary and finding the longest emoticon */
stage_four(emtcns, &num_emtcns);
/* stage 5: removing invalid emoticons with the help of the dictionary */
stage_five(msgs, num_msgs, emtcns, num_emtcns);
/* all done; take some rest */
return 0;
}
/* read a line of input into one_msg */
void
read_one_msg(msg_t one_msg, int max_len) {
int i = 0, c;
while (((c = getchar()) != EOF) && (c != '\n') && (c != '\r')) {
if (i < max_len) {
one_msg[i++] = c;
} else {
printf("Invalid input line, toooooooo long.\n");
exit(EXIT_FAILURE);
}
}
one_msg[i] = '\0';
}
/* print stage header given stage number */
void
print_stage_header(int stage_num) {
printf(STAGE_HEADER, stage_num);
}
/****************************************************************/
/* add your code below */
/* removes an emtcn from a msg given a length and offset */
void
remove_curr_emtcn(msg_t one_msg, int emtcn_len, int offset, int *msg_len) {
msg_t msg_before, msg_after;
int i, j;
/* the message before the emtcn not in dict */
for (i = 0; i < offset; i++) {
msg_before[i] = one_msg[i];
}
msg_before[i] = '\0';
/* the message after the emtcn not in dict */
for (j = offset + emtcn_len; one_msg[j] != '\0'; j++) {
msg_after[j - offset - emtcn_len] = one_msg[j];
}
msg_after[j - offset - emtcn_len] = '\0';
/* replacing one_msg with msg_before and msg_after */
one_msg[0] = 0;
for (i = 0; i < *msg_len - emtcn_len; i++) {
if (i < offset) {
one_msg[i] = msg_before[i];
} else {
one_msg[i] = msg_after[i - offset];
}
}
one_msg[i] = '\0';
// here is where a bug occurs in test 1
*msg_len -= emtcn_len;
}
/* finds the next emoticon in one_msg and copies it to emtcn_msg */
void
get_next_emtcn(msg_t one_msg, emtcn_t emtcn_msg, \
int *emtcn_len, int *msg_offset) {
int i;
for (i = *msg_offset; one_msg[i] != '\0'; i++) {
if (one_msg[i] == ',') {
break;
} else {
emtcn_msg[i - *msg_offset] = one_msg[i];
}
}
*emtcn_len = i - *msg_offset;
emtcn_msg[*emtcn_len] = '\0';
*msg_offset += *emtcn_len + 1; // add 1 to account for the comma
}
/* tests if two emtcns are identical */
int
is_emtcns_same(emtcn_t emtcn1, emtcn_t emtcn2, int emtcn_len) {
for (int i = 0; i < emtcn_len; i++) {
if (emtcn1[i] != emtcn2[i]) {
return 0;
}
}
return 1;
}
/* tests if an emoticon is in the dictionary emtcns[] */
int
is_in_dict(emtcn_t curr_emtcn, int curr_emtcn_len, \
emtcn_t emtcns[], int num_emtcns) {
/* the current emtcn from emtcns[] being compared */
emtcn_t emtcn_from_dict;
int i, j;
for (i = 0; i < num_emtcns; i++) {
/* extracts the emoticon from the dictionary */
for (j = 0; emtcns[i][j] != ','; j++) {
emtcn_from_dict[j] = emtcns[i][j];
}
emtcn_from_dict[j] = '\0';
/* compares lengths first to avoid unnecessary function calls */
if (curr_emtcn_len == j) {
if (is_emtcns_same(emtcn_from_dict, curr_emtcn, curr_emtcn_len)) {
return 1;
}
}
}
return 0;
}
/* removes leading commas from a message */
void
remove_leading_commas(msg_t one_msg, int msg_len) {
/* counts the number of leading commas */
int i = 0;
while (one_msg[i] == ',') {
i += 1;
}
/* counts the number of characters copied into new_msg */
int j;
/* the message without leading commas */
msg_t new_msg;
for (j = 0; j < msg_len; j++) {
if (one_msg[j + i] == '\0') {
break;
} else {
new_msg[j] = one_msg[j + i];
}
}
new_msg[j] = '\0';
/* clears one_msg, and replaces with new_msg */
one_msg[0] = 0;
strncpy(one_msg, new_msg, j);
one_msg[j] = '\0';
}
/* removes trailing commas from a message */
void
remove_trailing_commas(msg_t one_msg, int msg_len) {
/* counts the number of trailing commas */
int i = 0;
while (one_msg[msg_len - i - 1] == ',') {
i += 1;
}
/* counts the number of characters copied into new_msg */
int j;
/* the message without trailing commas */
msg_t new_msg;
for (j = 0; j < msg_len - i; j++) {
new_msg[j] = one_msg[j];
}
new_msg[j] = '\0';
/* clears one_msg, and replaces with new_msg */
one_msg[0] = 0;
strncpy(one_msg, new_msg, j);
one_msg[j] = '\0';
}
/* removes consecutive commas from within a message */
void remove_consecutive_commas(msg_t one_msg, int msg_len) {
one_msg[msg_len] = '\0'; // safety net
int max_commas_length = length_consecutive_commas(one_msg, msg_len);
while (max_commas_length > 1) {
int i = 0, j = 0;
msg_t new_msg, msg_cat;
/* find out where the consecutive commas start */
for (i = 0; i < msg_len; i++) {
if ((one_msg[i] == ',') && (one_msg[i + 1] == ',')) {
break;
}
}
/* add all characters including one comma to new_msg */
strncpy(new_msg, one_msg, i);
new_msg[i] = '\0';
/* find out how long the rest of the message is */
for (j = 0; j < msg_len; j++) {
if (one_msg[j + i + max_commas_length - 1] == '\0') {
break;
} else {
msg_cat[j] = one_msg[j + i + max_commas_length - 1];
}
}
msg_cat[j] = '\0';
/* and add the rest of the message to new_msg (exluding the extra
* commas
*/
strncat(new_msg, msg_cat, j);
new_msg[i + j] = '\0';
/* clear one_msg and copy the edited message to it */
one_msg[0] = 0;
strncpy(one_msg, new_msg, i + j);
one_msg[i + j] = '\0';
msg_len = i + j;
/* Loop guard */
max_commas_length = length_consecutive_commas(one_msg, msg_len);
}
}
/* finds the length of the first non-one length of commas */
int length_consecutive_commas(msg_t one_msg, int msg_len) {
int i, counter = 0;
for (i = 0; i < msg_len; i++) {
if (one_msg[i] == ',') {
if (one_msg[i + 1] == ',') {
counter += 1;
continue;
} else {
if (counter) {
break;
}
}
}
}
return counter + 1;
}
/* checks if the current message is a seperating line */
int
is_seperating_line(msg_t one_msg) {
msg_t seperating_line = "###";
if (!strncmp(one_msg, seperating_line, strlen(seperating_line))) {
return 1;
}
return 0;
}
/* scan a message and count the number of tokens in it */
int
count_tokens(msg_t one_msg) {
/* this counts the number of commas, which is analogous
to the number of tokens */
int num_tokens = 1;
for (int i = 0; i < (int)strlen(one_msg); i++) {
if (one_msg[i] == ',') {
num_tokens += 1;
}
}
return num_tokens;
}
/****************************************************************/
/* stages 1 - 5 */
/* stage 1: reading the first message */
void
stage_one(msg_t one_msg) {
/* print stage header */
print_stage_header(STAGE_NUM_ONE);
/* read the first message */
read_one_msg(one_msg, MAX_MSG_LENGTH);
/* count and print the number of tokens */
printf("Number of tokens: %d\n\n", count_tokens(one_msg));
}
/* stage 2: removing alphanumeric characters */
void
stage_two(msg_t msgs[], int *num_msgs) {
print_stage_header(STAGE_NUM_TWO);
/* *num_msgs will be used as an index, hence why it is zeroed */
*num_msgs = 0;
/* the current message being analysed */
msg_t curr_msg;
/* as msgs[0] already has a message in it, copy that to curr_msg
* and use that in the loop first, then read more messages
*/
strncpy(curr_msg, msgs[0], strlen(msgs[0]));
curr_msg[strlen(msgs[0])] = '\0';
do {
if (is_seperating_line(curr_msg)) {
break;
}
/* the message without alphanumeric characters */
msg_t new_msg;
/* the current character length of new_msg */
int len = 0;
for (int j = 0; j <= (int)strlen(curr_msg); j++) {
if (!isalnum(curr_msg[j])) {
new_msg[len++] = curr_msg[j];
}
}
new_msg[len] = '\0';
/* Copy the edited message to msgs[] */
strncpy(msgs[*num_msgs], new_msg, len);
msgs[*num_msgs][len] = '\0';
/* prepare for the next message to be read */
read_one_msg(curr_msg, MAX_MSG_LENGTH);
*num_msgs += 1;
} while (*num_msgs <= MAX_NUM_MSGS);
for (int i = 0; i < *num_msgs; i++) {
printf("%s\n", msgs[i]);
}
printf("\n");
}
/* stage 3: removing extra commas */
void
stage_three(msg_t msgs[], int num_msgs) {
print_stage_header(STAGE_NUM_THREE);
int i, curr_msg_len;
msg_t curr_msg;
for (i = 0; i < num_msgs; i++) {
/* copy the current message stored in msgs[i] to curr_msg */
curr_msg_len = strlen(msgs[i]);
strncpy(curr_msg, msgs[i], curr_msg_len);
curr_msg[curr_msg_len] = '\0';
/* remove extra commas from curr_msg */
remove_leading_commas(curr_msg, curr_msg_len);
curr_msg_len = strlen(curr_msg);
remove_trailing_commas(curr_msg, curr_msg_len);
curr_msg_len = strlen(curr_msg);
remove_consecutive_commas(curr_msg, curr_msg_len);
curr_msg_len = strlen(curr_msg);
/* Clear the current message stored in msgs[i] and replace
* with the edited message stored in curr_msg
*/
msgs[i][0] = 0;
strncpy(msgs[i], curr_msg, strlen(curr_msg));
msgs[i][curr_msg_len] = '\0';
}
for (int j = 0; j < num_msgs; j++) {
printf("%s\n", msgs[j]);
}
printf("\n");
}
/* stage 4: reading the dictionary and finding the longest emoticon */
void
stage_four(emtcn_t emtcns[], int *num_emtcns) {
print_stage_header(STAGE_NUM_FOUR);
/* the emoticon to be read */
emtcn_t new_emtcn;
while (*num_emtcns <= MAX_NUM_EMTCN) {
read_one_msg(new_emtcn, MAX_EMTCN_LENGTH);
/* break as soon as the line is empty */
if ((int)strlen(new_emtcn) == 0) {
break;
}
strncpy(emtcns[*num_emtcns], new_emtcn, strlen(new_emtcn));
emtcns[*num_emtcns][(int)strlen(new_emtcn)] = '\0';
*num_emtcns += 1;
}
emtcn_t max_emtcn;
int max_emtcn_len = 0, curr_len, i, j;
for (i = 0; i < *num_emtcns; i++) {
/* get the length of emoticon, and compare it to the max length */
for (j = 0; j < (int)strlen(emtcns[i]); j++) {
if (emtcns[i][j] == ',') {
break;
}
}
curr_len = j;
if (curr_len > max_emtcn_len) {
max_emtcn_len = curr_len;
strncpy(max_emtcn, emtcns[i], j);
max_emtcn[j] = '\0';
}
}
printf("Emoticon total: %d\nLongest: %s\nLength: %d\n",
*num_emtcns, max_emtcn, max_emtcn_len);
printf("\n");
}
/* stage 5: removing invalid emoticons with the help of the dictionary */
void
stage_five(msg_t msgs[], int num_msgs, emtcn_t emtcns[], int num_emtcns) {
print_stage_header(STAGE_NUM_FIVE);
int i, j;
/* the length of current message from msgs[] */
int curr_msg_len = 0;
/* the current emtcn from msgs[] */
emtcn_t curr_emtcn;
int curr_emtcn_len = 0;
/* the index of the curr_emtcn from msgs[] */
int curr_offset = 0;
for (i = 0; i < num_msgs; i++) {
for (j = 0; msgs[i][j] != '\0'; j++) {
/* do nothing, count characters before \0 */
}
curr_msg_len = j;
while (curr_offset < curr_msg_len) {
/* get the next emoticon, check if it's in emtcns, and remove it
* from msgs[i] if its not in emtcns
*/
get_next_emtcn(msgs[i], curr_emtcn, &curr_emtcn_len, &curr_offset);
if (!is_in_dict(curr_emtcn, curr_emtcn_len, emtcns, num_emtcns)) {
/* this is much easier to do with strncat, but can't be used
* here due to assignment restrictions
*/
remove_curr_emtcn(msgs[i], curr_emtcn_len, \
(curr_offset - curr_emtcn_len - 1), \
&curr_msg_len);
}
}
curr_msg_len = 0;
curr_offset = 0;
}
for (i = 0; i < num_msgs; i++) {
if (msgs[i][0] == '\0') {
continue;
}
printf("%s\n", msgs[i]);
}
}
/*
* _ _ _ _
* /\ | | (_) | | | |
* / \ | | __ _ ___ _ __ _ | |_ | |__ _ __ ___ ___
* / /\ \ | | / _` | / _ \ | '__| | | | __| | '_ \ | '_ ` _ \ / __|
* / ____ \ | | | (_| | | (_) | | | | | | |_ | | | | | | | | | | \__ \
* /_/ \_\ |_| \__, | \___/ |_| |_| \__| |_| |_| |_| |_| |_| |___/
* __/ |
* |___/
* ______ _
* /\ | ____| | |
* / \ _ __ ___ | |__ _ _ _ __ | |
* / /\ \ | '__| / _ \ | __| | | | | | '_ \ | |
* / ____ \ | | | __/ | | | |_| | | | | | |_|
* /_/ \_\ |_| \___| |_| \__,_| |_| |_| (_)
*
*/