commit fa1fa79ac763417f25764e8e9ef92f29563e6090 Author: Rory Healy Date: Sat Jun 8 21:05:12 2024 +1000 Initial commit. Note this is my assignment submission as is - no modifications have been made since. diff --git a/part1.py b/part1.py new file mode 100644 index 0000000..b34c80b --- /dev/null +++ b/part1.py @@ -0,0 +1,30 @@ +# Title: Project 1 - Friends +# Author: Rory Healy +# Date created - 16th April 2019 +# Date modified - 18th April 2019 + +def get_friendly_dict(friend_list): + '''Takes a list of reciprocal friendships links between individuals, + friend_list, and calculates the degree-one friends of each individual. + Returns a dictionary of sets containing all the immediate friends.''' + + # Creates a list of everyone. + everyone_list = [] + for pairs in friend_list: + for person in pairs: + if person not in everyone_list: + everyone_list.append(person) + + # Creates a dictionary using the people from everyone_list as keys, and + # assigns values as a set of immediate friends. + current_friends = [] + friend_dict = {} + for person in everyone_list: + for pairs in friend_list: + # Adds the other person in the pair to the list current_friends. + if person in pairs: + current_friends.append(pairs[pairs.index(person) - 1]) + friend_dict[person] = set(current_friends) + current_friends = [] + + return friend_dict diff --git a/part2.py b/part2.py new file mode 100644 index 0000000..d252535 --- /dev/null +++ b/part2.py @@ -0,0 +1,26 @@ +# Title: Project 1 - Social Network Besties +# Author: Rory Healy +# Date created - 17th April 2019 + +def friend_besties(individual, bestie_dict): + '''Takes an individual's name, stored as a string "individual", and the + dictionary of sets of immediate friends, stored as a dictionary + "bestie_dict". Returns a sorted list of the individual's degree-one + friends.''' + + # Creates an empty list, best_friends, and adds immediate friends of the + # individual to the list. + best_friends = [] + for item in list(bestie_dict.items()): + # Define the current person and their friends in the for loop. + current_person = item[0] + current_friends = item[1] + + if len(current_friends) == 0: + return best_friends + else: + if current_person == individual: + for friends in current_friends: + best_friends.append(friends) + + return sorted(best_friends) diff --git a/part3.py b/part3.py new file mode 100644 index 0000000..63b0937 --- /dev/null +++ b/part3.py @@ -0,0 +1,39 @@ +# Title: Project 1 - Social Network Second Besties +# Author: Rory Healy +# Date created - 17th April 2019 +# Date modified - 18th April 2019 + +def friend_second_besties(individual, bestie_dict): + '''Takes a person, stored as the string "individual", and the dictionary of + everyone's friends, stored as the dictionary "bestie_dict", and + returns a sorted list of the individual's degree-two friends.''' + + # Creates a list of all people who are two degrees of seperation away from + # the individual. + second_best_friends = [] + for item in list(bestie_dict.items()): + current_deg1_friend = item[0] + # Adds the degree-two friends to the list second_best_friends. + if current_deg1_friend == individual: + for current_deg2_friend in bestie_dict[current_deg1_friend]: + second_best_friends.append(bestie_dict[current_deg2_friend]) + + # Places the elements from the inner list of second_best_friends in the + # list second_best_friends and deletes the inner list. + return_list = [] + if second_best_friends == return_list: + return_list = [] + else: + for people in second_best_friends[0]: + return_list.append(people) + + # Removes the individual and any immediate friends from the list + # return_list. + for deg2_friend in return_list: + if deg2_friend == individual: + return_list.remove(individual) + for deg2_friend in return_list: + if deg2_friend in bestie_dict[individual]: + return_list.remove(deg2_friend) + + return sorted(return_list) diff --git a/part4.py b/part4.py new file mode 100644 index 0000000..adb52f7 --- /dev/null +++ b/part4.py @@ -0,0 +1,47 @@ +# Title: Project 1 - Network Coverage +# Author: Rory Healy +# Date created - 18th April 2019 + +def besties_coverage(individuals, bestie_dict, relationship_list): + '''Takes a list of people "individuals", stored as strings in a list, a + dictionary of sets of friends "bestie_dict", and a list of functions that + define relationships in the social network, selected from friend_besties + and friend_second_besties. Returns a float that corresponds to the + proportion of individuals who are either a member of individuals or are + connected via a relationship stated in relationship_list.''' + + # Calculate the total number of people in the network to calculate the + # proportion. + list_of_people = [] + + for item in list(bestie_dict.items()): + if item[0] not in list_of_people: + list_of_people.append(item[0]) + number_of_people = len(list_of_people) + + # Calculates number of people in 'individuals' to calculate the proportion. + number_of_individuals = len(individuals) + + # Calculates number of relationships the individual has to calculate the + # proportion. + for i in range(len(individuals)): + number_of_besties = len(friend_besties(individuals[i], + bestie_dict)) + number_of_second_besties = len(friend_second_besties(individuals[i], + bestie_dict)) + + number_of_relationships = 0 + + if len(relationship_list) == 0: + number_of_relationships = 0 + else: + for relationship_type in relationship_list: + if str(relationship_type) == str(friend_besties): + number_of_relationships += number_of_besties + elif str(relationship_type) == str(friend_second_besties): + number_of_relationships += number_of_second_besties + + # Returns the proportion as defined in the docstring. + number_of_connections = number_of_relationships + number_of_individuals + coverage_proportion = number_of_connections / number_of_people + return coverage_proportion diff --git a/part5.py b/part5.py new file mode 100644 index 0000000..4a7abfe --- /dev/null +++ b/part5.py @@ -0,0 +1,34 @@ +# Title: Project 1 - Social Network Attribute Prediction +# Author: Rory Healy +# Date created - 18th April 2019 + +def friendship_closeness(): + '''Assigns a "friendship-closeness" value, from 0 to 1, based on the number + of degrees of seperation between people. The further the seperation, the + closer this value is to 0. Takes a dictionary of sets of friends + "bestie_dict" and returns a dictionary of sets of friends with their + "friendship-closeness" value.''' + +def prioritise_friendships(): + '''Make the predictions prioritise those from the people with the highest + "friendship-closeness" value. Takes a user, stored as the string + "unknown_user", and returns a list of sets of other people in order of + highest to lowest closeness (e.g. degree-one friends are in the first set, + degree-two people are in the second set, etc.).''' + +def friendly_prediction(unknown_user, features, bestie_dict, feat_dict): + '''Takes a user, stored as the string "unknown_user", features of the user + that are used to predict attributes, stored as a set "features", a + dictionary of sets of friends "bestie_dict", and a dictionary containing + the known attributes for each user "feat_dict". Returns a dictionary of + features with a predicted list of values for each feature.''' + + # For the first given example, the predicted favourite author comes from + # Kim, while the predicted university comes from both Sandy and Alex. As + # Sandy and Alex are both degree-two friends, preference can be given to + # neither without further information. This is a main limitation for the + # 'friendship-closeness' value being used to make a prediction, as the + # unknown user may be closer to one degree-two friends than another. I + # know that this isn't necessarily relevent to creating this program, just + # something that I was thinking about while fixing up the other parts of + # this project. diff --git a/project01 b/project01 new file mode 100644 index 0000000..f5c5314 --- /dev/null +++ b/project01 @@ -0,0 +1,214 @@ +Project 1 is all about "social networks", and the power of social connections, +both in terms of how impressively large a portion of the social network can be +accessed from a small number of seed users and their friends or +friends-of-friends, and how accurately the attributes of an individual can be +predicted from (partial) attributes of their friends/friends-of-friends. A +large part of the context for the project is in illustrating how it is that +companies such as Cambridge Analytica are able to influence the world so +impressively, from a small set of users of their products. + +Throughout the project, we will refer to individuals as "nodes" in the social +network, and (mutual) friendship connections as "edges" connecting those nodes. +See the lecture slides for more details. + +-------------------------------------------------------------------------------- +Part 1 - Friends + +Write a function get_friendly_dict() that calculates the degree-one friends of +each individual in a social network. The function takes one argument: + +- friend_list, a list of reciproal friendship links between individuals. + +The function should return a dictionary of sets, containing the set of all +"degree-one" (= immediate) friends for each individual in the social network. +Note that the specific order of the individuals in the dictionary, and also the +ordering of the friends in each set does not matter. + +The structure of friend_list is as follows: each element is a 2-tuple of +strings, representing a pairing of names of individuals in the social network +who are friends. Note that as friendship links are reciprocal, the 2-tuple +('kim', 'sandy'), e.g., indicates that 'kim' is a friend of 'sandy', and also +that 'sandy' is a friend of 'kim'. + +Example function calls are: + + >>> get_friendly_dict([('kim', 'sandy'), ('alex', 'sandy'), + ('kim', 'alex'), ('kim', 'glenn')]) + {'kim': {'glenn', 'sandy', 'alex'}, 'sandy': {'kim', 'alex'}, + 'alex': {'sandy', 'kim'}, 'glenn': {'kim'}} + + >>> get_friendly_dict([('kim', 'sandy'), ('sandy', 'alex'), + ('alex', 'glenn'), ('glenn', 'kim')]) + {'kim': {'glenn', 'sandy'}, 'sandy': {'kim', 'alex'}, + 'alex': {'glenn', 'sandy'}, 'glenn': {'kim', 'alex'}} + +-------------------------------------------------------------------------------- +Part 2 - Social Network Besties + +Write a function friend_besties() that calculates the "besties" (i.e. +degree-one friends) of a given individual in a social network. The function +takes two arguments: + +- individual, an individual in the social network, in the form of a string ID +- bestie_dict, a dictionary of sets of friends of each individual in the + social network (as per the first question of the Project) + +The function should return a sorted list, made up of all "degree-one" friends +for the individual. In the instance that the individual does not have any +friends in the social network, the function should return an empty list. + +Example function calls are: + + >>> friend_besties('kim', {'kim': {'sandy', 'alex', 'glenn'}, + 'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}) + ['alex', 'glenn', 'sandy'] + + >>> friend_besties('ali', {'kim': {'sandy', 'alex', 'glenn'}, + 'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}) + [] + +-------------------------------------------------------------------------------- +Part 3 - Social Network Second Besties + +Write a function friend_second_besties() that calculates the "second-besties" +(i.e. degree-two friends) of a given individual in a social network. The +function takes two arguments: + +- individual, an individual in the social network, in the form of a string ID +- bestie_dict, a dictionary of sets of friends of each individual in the + social network (as per the first question of the Project) + +The function should return a sorted list, made up of all "degree-two" friends +for the individual. In the instance that the individual does not have any +degree-two friends in the social network, the function should return an +empty list. + +Example function calls are: + + >>> friend_second_besties('glenn', {'kim': {'sandy', 'alex', 'glenn'}, + 'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}) + ['alex', 'sandy'] + + >>> friend_second_besties('kim', {'kim': {'sandy', 'alex', 'glenn'}, + 'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}) + [] + +-------------------------------------------------------------------------------- +Part 4 - Network Coverage + +Write a function besties_coverage() that computes the "coverage" of nodes +within a social network that are connected via predefined relationships to a +given list of individuals, i.e. the proportion of connected individuals, to the +total size of the network (= the number of people in the social network). The +function takes three arguments: + +- individuals, a list of individuals, each in the form of a string ID +- bestie_dict, a dictionary of sets of friends of each individual in the + social network (as per the first question of the Project) +- relationship_list, a list of functions defining relationships in the + social network, selected from friend_besties and friend_second_besties. + +The function should return a float, corresponding to the proportion of the +total number of individuals who are either a member of individuals or connected +via one of the relationships in relationship_list. + +Example calls to the function are: + + >>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'}, + 'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}, + []) + 0.25 + + >>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'}, + 'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}, + [friend_besties]) + 0.5 + + >>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'}, + 'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}, + [friend_second_besties]) + 0.75 + + >>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'}, + 'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}, + [friend_besties, friend_second_besties]) + 1.0 + +-------------------------------------------------------------------------------- +Part 5 - Social Network Attribute Prediction + +The final question is for bonus marks, and is deliberately quite a bit harder +than the four basic questions (and the number of marks on offer is deliberately +not commensurate with the amount of effort required — bonus marks aren't meant +to be easy to get!). Only attempt this is you have completed the earlier +questions, and are up for a challenge! + +The context for the bonus question is the prediction of attributes of a user +based on the attributes of their social network, and the observation that a +user's friends often have very similar interests and background to that user +(what is formally called homophily). + +Write a function friendly_prediction() which takes four arguments: + +- unknown_user, a string indicating the identity of the user you are to predict + attributes for +- features, a set of features you are to predict attributes for +- bestie_dict, a dictionary of sets of the besties for each user in the + dataset, following the same format as the earlier questions in the project +- feat_dict, a dictionary containing the known attributes for each user in the + training data, across a range of features; note that there is no guarantee + that the attribute for a given feature will be known for every training user + +Your function should return a dictionary of features (based on features), with +a predicted list of values for each. + +Your function should make its predictions as follows: + +- first, identify the set of besties for the given user, and for each feature + of interest, determine the most-commonly attested attribute for that feature + among the besties; in the case of a tie, the prediction should be a sorted + list of attributes + +- second, for any features where no bestie has an attribute for that feature + (meaning no prediction was possible in the first step), repeat the process + using the second-besties, once again in the form of a sorted list + of attributes + +- in the case that no bestie or second-bestie has that attribute, return an + empty list. + +Note that all attributes will take the form of strings, with the empty string +representing the fact that the user explicitly has no value for that feature +(e.g. if the user did not go to university, the value for university would be +''), and the lack of an attribute for a given feature indicating that the +attribute is unknown. Note further that even if the attribute for unknown_user +is available in feat_dict, you should predict based on the attributes of +besties and second besties. + +Example calls to the function are: + + >>> friendly_prediction('glenn', {'favourite author', 'university'}, + {'kim': {'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'}, + 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn': + {'university': ''}, 'kim': {'favourite author': 'AA Milne'}, 'sandy': + {'favourite author': 'JRR Tolkien', + "university": "University of Melbourne"}, 'alex': {'favourite author': + 'AA Milne', 'university': 'Monash University'}}) + {'university': ['Monash University', 'University of Melbourne'], + 'favourite author': ['AA Milne']} + + >>> friendly_prediction('kim', {'university'}, {'kim': + {'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'}, 'alex': + {'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn': {'university': ''}, + 'kim': {'favourite author': 'AA Milne'}, 'sandy': {'favourite author': + 'JRR Tolkien', "university": "University of Melbourne"}, 'alex': + {'favourite author': 'AA Milne', 'university': 'Monash University'}}) + {'university': ['', 'Monash University', 'University of Melbourne']} + + >>> friendly_prediction('kim', {'birthplace'}, {'kim': + {'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'}, 'alex': + {'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn': {'university': ''}, + 'kim': {'favourite author': 'AA Milne'}, 'sandy': {'favourite author': + 'JRR Tolkien', "university": "University of Melbourne"}, 'alex': + {'favourite author': 'AA Milne', 'university': 'Monash University'}}) + {'birthplace': []} diff --git a/project01-marks-updated.pdf b/project01-marks-updated.pdf new file mode 100755 index 0000000..42ed154 Binary files /dev/null and b/project01-marks-updated.pdf differ diff --git a/project01-marks.pdf b/project01-marks.pdf new file mode 100755 index 0000000..2e6b7f3 Binary files /dev/null and b/project01-marks.pdf differ diff --git a/project01-rubric.pdf b/project01-rubric.pdf new file mode 100755 index 0000000..16783ff Binary files /dev/null and b/project01-rubric.pdf differ diff --git a/project01-sample-solutions.py b/project01-sample-solutions.py new file mode 100644 index 0000000..1bade64 --- /dev/null +++ b/project01-sample-solutions.py @@ -0,0 +1,134 @@ +# ------------------------------------------------------------------------------ +# Part 1 + +def add_friend(individual, friend, friend_dict): + """add `friend` as friend of `individual` in `friend_dict`""" + if individual not in friend_dict: + friend_dict[individual] = set() + friend_dict[individual].add(friend) + +def get_friendly_dict(friend_list): + """take `friend_list` (list of undirected friendship links) + and return set of all direct friends of each user in that + list, in form of dictionary of sets""" + + # generate dictionary of all friends of users in `friend_list` + friend_dict = {} + for (node1, node2) in friend_list: + add_friend(node1, node2, friend_dict) + add_friend(node2, node1, friend_dict) + return friend_dict + +# ------------------------------------------------------------------------------ +# Part 2 + +def friend_besties(individual, bestie_dict): + """generate the sorted set of (first-order) friends for `individual`, by + look-up in `bestie_dict`""" + + if individual in bestie_dict: + besties = bestie_dict[individual] + else: + besties = set() + + return sorted(besties) + +# ------------------------------------------------------------------------------ +# Part 3 + +def friend_second_besties(individual, bestie_dict): + """generate the set of (strictly) second-order friends for + `individual`, based on the contents of `bestie_dict`""" + + # extract out friends of friends for each individual in + # `individual_list`, by finding friends of each individual, + # and friends of those friends, making sure that + # degree of separation is strictly 2 + second_besties = set() + if individual in bestie_dict: + for bestie in bestie_dict[individual]: + if bestie in bestie_dict: + second_besties = second_besties.union(bestie_dict[bestie]) + + # remove anyone who is a direct friend or the individual themself + second_besties = second_besties.difference( + bestie_dict[individual].union(set([individual]))) + return sorted(second_besties) + +# ------------------------------------------------------------------------------ +# Part 4 + +def besties_coverage(individuals, bestie_dict, relationship_list): + """calculate what proportion of individuals in `friend_dict` are + in the full list of individuals (`everyone_list`)""" + + # get the set of everyone connected to each individual in + # `individuals` via the relations in `relationship_list` + # (including the individuals themselves) + connected = set() + for individual in individuals: + + # only add to set of individuals if network + if individual in bestie_dict: + connected.add(individual) + + for relationship in relationship_list: + connected = connected.union(relationship(individual, bestie_dict)) + + return len(connected) / len(bestie_dict) + +# ------------------------------------------------------------------------------ +# Part 5 + +from collections import defaultdict + +def predict_attribute(friends, feat_dict, feature): + """predict the value of `feature` from the set `friends` based on the + attributes in `feat_dict`""" + + # accumulator for attribute freqs + val_count = defaultdict(int) + + # for each friend, add vote for relevant attribute if they have it + for friend in friends: + if friend in feat_dict and feature in feat_dict[friend]: + val_count[feat_dict[friend][feature]] += 1 + + # find the attributes with the highest frequency and return as + # sorted list, assuming at least one attribute prediction made + if val_count: + max_count = 0 + for attribute, count in val_count.items(): + if count > max_count: + att_list = [attribute] + max_count = count + elif count == max_count: + att_list.append(attribute) + return sorted(att_list) + + # if no users with relevant attribute, no prediction to be made + else: + return [] + +def friendly_prediction(unknown_user, features, bestie_dict, feat_dict): + """predict the attributes of `unknown_user` for each feature in `features`, + based on the social network in `bestie_dict` and user attribute data in + `feat_dict`, and return the predictions in the form of a dictionary + of lists""" + + # dictionary of predictions for each feature + predictions = {} + + # predict attribute for each feature based on besties, and failing that, + # second besties + for feature in features: + besties_predict = predict_attribute(friend_besties(unknown_user, + bestie_dict), feat_dict, feature) + if besties_predict: + predictions[feature] = besties_predict + else: + second_besties_predict = predict_attribute(friend_second_besties( + unknown_user, bestie_dict), feat_dict, feature) + predictions[feature] = second_besties_predict + + return predictions