Initial commit. Note this is my assignment submission as is - no modifications have been made since.

2024-06-08 21:05:12 +10:00 · 2024-06-08 21:05:12 +10:00 · fa1fa79ac7
commit fa1fa79ac7
10 changed files with 524 additions and 0 deletions
--- a/part1.py
+++ b/part1.py
@ -0,0 +1,30 @@
+# Title: Project 1 - Friends
+# Author: Rory Healy
+# Date created - 16th April 2019
+# Date modified - 18th April 2019
+
+def get_friendly_dict(friend_list):
+    '''Takes a list of reciprocal friendships links between individuals, 
+    friend_list, and calculates the degree-one friends of each individual. 
+    Returns a dictionary of sets containing all the immediate friends.'''
+    
+    # Creates a list of everyone.
+    everyone_list = []
+    for pairs in friend_list:
+        for person in pairs:
+            if person not in everyone_list:
+                everyone_list.append(person)
+                
+    # Creates a dictionary using the people from everyone_list as keys, and 
+    # assigns values as a set of immediate friends.
+    current_friends = []
+    friend_dict = {}
+    for person in everyone_list:
+        for pairs in friend_list:
+            # Adds the other person in the pair to the list current_friends.
+            if person in pairs:
+                current_friends.append(pairs[pairs.index(person) - 1])
+        friend_dict[person] = set(current_friends)
+        current_friends = []
+    
+    return friend_dict
--- a/part2.py
+++ b/part2.py
@ -0,0 +1,26 @@
+# Title: Project 1 - Social Network Besties
+# Author: Rory Healy
+# Date created - 17th April 2019
+
+def friend_besties(individual, bestie_dict):
+    '''Takes an individual's name, stored as a string "individual", and the 
+    dictionary of sets of immediate friends, stored as a dictionary 
+    "bestie_dict". Returns a sorted list of the individual's degree-one
+    friends.'''
+    
+    # Creates an empty list, best_friends, and adds immediate friends of the
+    # individual to the list. 
+    best_friends = []
+    for item in list(bestie_dict.items()):
+        # Define the current person and their friends in the for loop.
+        current_person = item[0]
+        current_friends = item[1]
+        
+        if len(current_friends) == 0:
+            return best_friends
+        else:
+            if current_person == individual:
+                for friends in current_friends:
+                    best_friends.append(friends)
+                
+    return sorted(best_friends)
--- a/part3.py
+++ b/part3.py
@ -0,0 +1,39 @@
+# Title: Project 1 - Social Network Second Besties
+# Author: Rory Healy
+# Date created - 17th April 2019
+# Date modified - 18th April 2019
+
+def friend_second_besties(individual, bestie_dict):
+    '''Takes a person, stored as the string "individual", and the dictionary of
+    everyone's friends, stored as the dictionary "bestie_dict", and
+    returns a sorted list of the individual's degree-two friends.'''
+    
+    # Creates a list of all people who are two degrees of seperation away from
+    # the individual.
+    second_best_friends = []
+    for item in list(bestie_dict.items()):
+        current_deg1_friend = item[0]
+        # Adds the degree-two friends to the list second_best_friends.
+        if current_deg1_friend == individual:
+            for current_deg2_friend in bestie_dict[current_deg1_friend]:
+                second_best_friends.append(bestie_dict[current_deg2_friend])
+    
+    # Places the elements from the inner list of second_best_friends in the
+    # list second_best_friends and deletes the inner list.
+    return_list = []
+    if second_best_friends == return_list:
+        return_list = []
+    else:
+        for people in second_best_friends[0]:
+            return_list.append(people)    
+    
+    # Removes the individual and any immediate friends from the list 
+    # return_list.
+    for deg2_friend in return_list:
+        if deg2_friend == individual:
+            return_list.remove(individual)
+    for deg2_friend in return_list:
+        if deg2_friend in bestie_dict[individual]:
+            return_list.remove(deg2_friend)
+    
+    return sorted(return_list)
--- a/part4.py
+++ b/part4.py
@ -0,0 +1,47 @@
+# Title: Project 1 - Network Coverage
+# Author: Rory Healy
+# Date created - 18th April 2019
+
+def besties_coverage(individuals, bestie_dict, relationship_list):
+    '''Takes a list of people "individuals", stored as strings in a list, a
+    dictionary of sets of friends "bestie_dict", and a list of functions that
+    define relationships in the social network, selected from friend_besties
+    and friend_second_besties. Returns a float that corresponds to the
+    proportion of individuals who are either a member of individuals or are
+    connected via a relationship stated in relationship_list.'''
+    
+    # Calculate the total number of people in the network to calculate the
+    # proportion.
+    list_of_people = []
+    
+    for item in list(bestie_dict.items()):
+        if item[0] not in list_of_people:
+            list_of_people.append(item[0])
+        number_of_people = len(list_of_people)
+    
+    # Calculates number of people in 'individuals' to calculate the proportion.
+    number_of_individuals = len(individuals)
+    
+    # Calculates number of relationships the individual has to calculate the
+    # proportion.
+    for i in range(len(individuals)):
+        number_of_besties = len(friend_besties(individuals[i], 
+                                                      bestie_dict))
+        number_of_second_besties = len(friend_second_besties(individuals[i], 
+                                                      bestie_dict))
+
+    number_of_relationships = 0
+    
+    if len(relationship_list) == 0:
+        number_of_relationships = 0
+    else:
+        for relationship_type in relationship_list:
+            if str(relationship_type) == str(friend_besties):
+                number_of_relationships += number_of_besties
+            elif str(relationship_type) == str(friend_second_besties):
+                number_of_relationships += number_of_second_besties
+    
+    # Returns the proportion as defined in the docstring.
+    number_of_connections = number_of_relationships + number_of_individuals
+    coverage_proportion = number_of_connections / number_of_people
+    return coverage_proportion
--- a/part5.py
+++ b/part5.py
@ -0,0 +1,34 @@
+# Title: Project 1 - Social Network Attribute Prediction
+# Author: Rory Healy
+# Date created - 18th April 2019
+
+def friendship_closeness():
+    '''Assigns a "friendship-closeness" value, from 0 to 1, based on the number 
+    of degrees of seperation between people. The further the seperation, the 
+    closer this value is to 0. Takes a dictionary of sets of friends
+    "bestie_dict" and returns a dictionary of sets of friends with their
+    "friendship-closeness" value.'''
+    
+def prioritise_friendships():
+    '''Make the predictions prioritise those from the people with the highest 
+    "friendship-closeness" value. Takes a user, stored as the string
+    "unknown_user", and returns a list of sets of other people in order of 
+    highest to lowest closeness (e.g. degree-one friends are in the first set,
+    degree-two people are in the second set, etc.).'''
+
+def friendly_prediction(unknown_user, features, bestie_dict, feat_dict):
+    '''Takes a user, stored as the string "unknown_user", features of the user 
+    that are used to predict attributes, stored as a set "features", a
+    dictionary of sets of friends "bestie_dict", and a dictionary containing 
+    the known attributes for each user "feat_dict". Returns a dictionary of
+    features with a predicted list of values for each feature.'''
+    
+    # For the first given example, the predicted favourite author comes from
+    # Kim, while the predicted university comes from both Sandy and Alex. As
+    # Sandy and Alex are both degree-two friends, preference can be given to
+    # neither without further information. This is a main limitation for the
+    # 'friendship-closeness' value being used to make a prediction, as the
+    # unknown user may be closer to one degree-two friends than another. I
+    # know that this isn't necessarily relevent to creating this program, just
+    # something that I was thinking about while fixing up the other parts of
+    # this project.
--- a/214
+++ b/214
@ -0,0 +1,214 @@
+Project 1 is all about "social networks", and the power of social connections,
+both in terms of how impressively large a portion of the social network can be
+accessed from a small number of seed users and their friends or 
+friends-of-friends, and how accurately the attributes of an individual can be
+predicted from (partial) attributes of their friends/friends-of-friends. A
+large part of the context for the project is in illustrating how it is that
+companies such as Cambridge Analytica are able to influence the world so
+impressively, from a small set of users of their products.
+
+Throughout the project, we will refer to individuals as "nodes" in the social
+network, and (mutual) friendship connections as "edges" connecting those nodes.
+See the lecture slides for more details.
+
+--------------------------------------------------------------------------------
+Part 1 - Friends
+
+Write a function get_friendly_dict() that calculates the degree-one friends of
+each individual in a social network. The function takes one argument:
+
+- friend_list, a list of reciproal friendship links between individuals.
+
+The function should return a dictionary of sets, containing the set of all
+"degree-one" (= immediate) friends for each individual in the social network.
+Note that the specific order of the individuals in the dictionary, and also the
+ordering of the friends in each set does not matter.
+
+The structure of friend_list is as follows: each element is a 2-tuple of
+strings, representing a pairing of names of individuals in the social network
+who are friends. Note that as friendship links are reciprocal, the 2-tuple
+('kim', 'sandy'), e.g., indicates that 'kim' is a friend of 'sandy', and also
+that 'sandy' is a friend of 'kim'.
+
+Example function calls are:
+
+    >>> get_friendly_dict([('kim', 'sandy'), ('alex', 'sandy'),
+        ('kim', 'alex'), ('kim', 'glenn')])
+    {'kim': {'glenn', 'sandy', 'alex'}, 'sandy': {'kim', 'alex'},
+     'alex': {'sandy', 'kim'}, 'glenn': {'kim'}}
+
+    >>> get_friendly_dict([('kim', 'sandy'), ('sandy', 'alex'),
+        ('alex', 'glenn'), ('glenn', 'kim')])
+    {'kim': {'glenn', 'sandy'}, 'sandy': {'kim', 'alex'},
+     'alex': {'glenn', 'sandy'}, 'glenn': {'kim', 'alex'}}
+
+--------------------------------------------------------------------------------
+Part 2 - Social Network Besties
+
+Write a function friend_besties() that calculates the "besties" (i.e.
+degree-one friends) of a given individual in a social network. The function
+takes two arguments:
+
+- individual, an individual in the social network, in the form of a string ID
+- bestie_dict, a dictionary of sets of friends of each individual in the
+  social network (as per the first question of the Project)
+
+The function should return a sorted list, made up of all "degree-one" friends
+for the individual. In the instance that the individual does not have any
+friends in the social network, the function should return an empty list.
+
+Example function calls are:
+
+    >>> friend_besties('kim', {'kim': {'sandy', 'alex', 'glenn'},
+        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
+    ['alex', 'glenn', 'sandy']
+
+    >>> friend_besties('ali', {'kim': {'sandy', 'alex', 'glenn'},
+        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
+    []
+
+--------------------------------------------------------------------------------
+Part 3 - Social Network Second Besties
+
+Write a function friend_second_besties() that calculates the "second-besties"
+(i.e. degree-two friends) of a given individual in a social network. The
+function takes two arguments:
+
+- individual, an individual in the social network, in the form of a string ID
+- bestie_dict, a dictionary of sets of friends of each individual in the
+  social network (as per the first question of the Project)
+
+The function should return a sorted list, made up of all "degree-two" friends
+for the individual. In the instance that the individual does not have any
+degree-two friends in the social network, the function should return an
+empty list.
+
+Example function calls are:
+
+    >>> friend_second_besties('glenn', {'kim': {'sandy', 'alex', 'glenn'},
+        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
+    ['alex', 'sandy']
+
+    >>> friend_second_besties('kim', {'kim': {'sandy', 'alex', 'glenn'},
+        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
+    []
+
+--------------------------------------------------------------------------------
+Part 4 - Network Coverage
+
+Write a function besties_coverage() that computes the "coverage" of nodes
+within a social network that are connected via predefined relationships to a
+given list of individuals, i.e. the proportion of connected individuals, to the
+total size of the network (= the number of people in the social network). The
+function takes three arguments:
+
+- individuals, a list of individuals, each in the form of a string ID
+- bestie_dict, a dictionary of sets of friends of each individual in the
+  social network (as per the first question of the Project)
+- relationship_list, a list of functions defining relationships in the
+  social network, selected from friend_besties and friend_second_besties.
+
+The function should return a float, corresponding to the proportion of the
+total number of individuals who are either a member of individuals or connected
+via one of the relationships in relationship_list.
+
+Example calls to the function are:
+
+    >>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
+        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}, 
+        [])
+    0.25
+
+    >>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'}, 
+        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
+        [friend_besties])
+    0.5
+
+    >>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
+        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
+        [friend_second_besties])
+    0.75
+
+    >>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
+        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
+        [friend_besties, friend_second_besties])
+    1.0
+
+--------------------------------------------------------------------------------
+Part 5 - Social Network Attribute Prediction
+
+The final question is for bonus marks, and is deliberately quite a bit harder
+than the four basic questions (and the number of marks on offer is deliberately
+not commensurate with the amount of effort required — bonus marks aren't meant
+to be easy to get!). Only attempt this is you have completed the earlier
+questions, and are up for a challenge!
+
+The context for the bonus question is the prediction of attributes of a user
+based on the attributes of their social network, and the observation that a
+user's friends often have very similar interests and background to that user
+(what is formally called homophily).
+
+Write a function friendly_prediction() which takes four arguments:
+
+- unknown_user, a string indicating the identity of the user you are to predict
+  attributes for
+- features, a set of features you are to predict attributes for
+- bestie_dict, a dictionary of sets of the besties for each user in the
+  dataset, following the same format as the earlier questions in the project
+- feat_dict, a dictionary containing the known attributes for each user in the
+  training data, across a range of features; note that there is no guarantee
+  that the attribute for a given feature will be known for every training user
+
+Your function should return a dictionary of features (based on features), with
+a predicted list of values for each.
+
+Your function should make its predictions as follows:
+
+- first, identify the set of besties for the given user, and for each feature
+  of interest, determine the most-commonly attested attribute for that feature
+  among the besties; in the case of a tie, the prediction should be a sorted
+  list of attributes
+
+- second, for any features where no bestie has an attribute for that feature
+  (meaning no prediction was possible in the first step), repeat the process
+  using the second-besties, once again in the form of a sorted list
+  of attributes
+
+- in the case that no bestie or second-bestie has that attribute, return an
+  empty list.
+
+Note that all attributes will take the form of strings, with the empty string
+representing the fact that the user explicitly has no value for that feature
+(e.g. if the user did not go to university, the value for university would be
+''), and the lack of an attribute for a given feature indicating that the
+attribute is unknown. Note further that even if the attribute for unknown_user
+is available in feat_dict, you should predict based on the attributes of
+besties and second besties.
+
+Example calls to the function are:
+
+    >>> friendly_prediction('glenn', {'favourite author', 'university'},
+        {'kim': {'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'},
+        'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn':
+        {'university': ''}, 'kim': {'favourite author': 'AA Milne'}, 'sandy':
+        {'favourite author': 'JRR Tolkien',
+        "university": "University of Melbourne"}, 'alex': {'favourite author':
+        'AA Milne', 'university': 'Monash University'}})
+    {'university': ['Monash University', 'University of Melbourne'],
+     'favourite author': ['AA Milne']}
+
+    >>> friendly_prediction('kim', {'university'}, {'kim':
+        {'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'}, 'alex':
+        {'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn': {'university': ''},
+        'kim': {'favourite author': 'AA Milne'}, 'sandy': {'favourite author':
+        'JRR Tolkien', "university": "University of Melbourne"}, 'alex':
+        {'favourite author': 'AA Milne', 'university': 'Monash University'}})
+    {'university': ['', 'Monash University', 'University of Melbourne']}
+
+    >>> friendly_prediction('kim', {'birthplace'}, {'kim':
+        {'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'}, 'alex':
+        {'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn': {'university': ''},
+        'kim': {'favourite author': 'AA Milne'}, 'sandy': {'favourite author':
+        'JRR Tolkien', "university": "University of Melbourne"}, 'alex':
+        {'favourite author': 'AA Milne', 'university': 'Monash University'}})
+    {'birthplace': []}
--- a/project01-marks-updated.pdf
+++ b/project01-marks-updated.pdf
--- a/project01-marks.pdf
+++ b/project01-marks.pdf
--- a/project01-rubric.pdf
+++ b/project01-rubric.pdf
--- a/project01-sample-solutions.py
+++ b/project01-sample-solutions.py
@ -0,0 +1,134 @@
+# ------------------------------------------------------------------------------
+# Part 1
+
+def add_friend(individual, friend, friend_dict):
+    """add `friend` as friend of `individual` in `friend_dict`"""
+    if individual not in friend_dict:
+        friend_dict[individual] = set()
+    friend_dict[individual].add(friend)
+
+def get_friendly_dict(friend_list):
+    """take `friend_list` (list of undirected friendship links)
+    and return set of all direct friends of each user in that
+    list, in form of dictionary of sets"""
+
+    # generate dictionary of all friends of users in `friend_list`
+    friend_dict = {}
+    for (node1, node2) in friend_list:
+        add_friend(node1, node2, friend_dict)
+        add_friend(node2, node1, friend_dict)
+    return friend_dict
+
+# ------------------------------------------------------------------------------
+# Part 2
+
+def friend_besties(individual, bestie_dict):
+    """generate the sorted set of (first-order) friends for `individual`, by 
+    look-up in `bestie_dict`"""
+
+    if individual in bestie_dict:
+        besties = bestie_dict[individual]
+    else:
+        besties = set()
+
+    return sorted(besties)
+
+# ------------------------------------------------------------------------------
+# Part 3
+
+def friend_second_besties(individual, bestie_dict):
+    """generate the set of (strictly) second-order friends for 
+    `individual`, based on the contents of `bestie_dict`"""
+
+    # extract out friends of friends for each individual in
+    # `individual_list`, by finding friends of each individual,
+    # and friends of those friends, making sure that
+    # degree of separation is strictly 2
+    second_besties = set()
+    if individual in bestie_dict:
+        for bestie in bestie_dict[individual]:
+            if bestie in bestie_dict:
+                second_besties = second_besties.union(bestie_dict[bestie])
+
+        # remove anyone who is a direct friend or the individual themself
+        second_besties = second_besties.difference(
+                            bestie_dict[individual].union(set([individual])))
+    return sorted(second_besties)
+
+# ------------------------------------------------------------------------------
+# Part 4
+
+def besties_coverage(individuals, bestie_dict, relationship_list):
+    """calculate what proportion of individuals in `friend_dict` are
+    in the full list of individuals (`everyone_list`)"""
+
+    # get the set of everyone connected to each individual in
+    # `individuals` via the relations in `relationship_list`
+    # (including the individuals themselves)
+    connected = set()
+    for individual in individuals:
+
+        # only add to set of individuals if network
+        if individual in bestie_dict:
+            connected.add(individual)
+
+        for relationship in relationship_list:
+            connected = connected.union(relationship(individual, bestie_dict))
+
+    return len(connected) / len(bestie_dict)
+
+# ------------------------------------------------------------------------------
+# Part 5
+
+from collections import defaultdict
+
+def predict_attribute(friends, feat_dict, feature):
+    """predict the value of `feature` from the set `friends` based on the
+    attributes in `feat_dict`"""
+
+    # accumulator for attribute freqs
+    val_count = defaultdict(int)
+
+    # for each friend, add vote for relevant attribute if they have it
+    for friend in friends:
+        if friend in feat_dict and feature in feat_dict[friend]:
+            val_count[feat_dict[friend][feature]] += 1
+
+    # find the attributes with the highest frequency and return as
+    # sorted list, assuming at least one attribute prediction made
+    if val_count:
+        max_count = 0
+        for attribute, count in val_count.items():
+            if count > max_count:
+                att_list = [attribute]
+                max_count = count
+            elif count == max_count:
+                att_list.append(attribute)
+        return sorted(att_list)
+
+    # if no users with relevant attribute, no prediction to be made
+    else:
+        return []
+        
+def friendly_prediction(unknown_user, features, bestie_dict, feat_dict):
+    """predict the attributes of `unknown_user` for each feature in `features`,
+    based on the social network in `bestie_dict` and user attribute data in
+    `feat_dict`, and return the predictions in the form of a dictionary 
+    of lists"""
+    
+    # dictionary of predictions for each feature
+    predictions = {}
+
+    # predict attribute for each feature based on besties, and failing that,
+    # second besties
+    for feature in features:
+        besties_predict = predict_attribute(friend_besties(unknown_user,
+                                            bestie_dict), feat_dict, feature)
+        if besties_predict:
+            predictions[feature] = besties_predict
+        else:
+            second_besties_predict = predict_attribute(friend_second_besties(
+                unknown_user, bestie_dict), feat_dict, feature)
+            predictions[feature] = second_besties_predict
+
+    return predictions