Initial commit. Note this is my assignment submission as is - no modifications have been made since.

2024-06-08 21:05:12 +10:00 · 2024-06-08 21:05:12 +10:00 · fa1fa79ac7
commit fa1fa79ac7
10 changed files with 524 additions and 0 deletions
--- a/part1.py
+++ b/part1.py
@ -0,0 +1,30 @@
 # Title: Project 1 - Friends
 # Author: Rory Healy
 # Date created - 16th April 2019
 # Date modified - 18th April 2019
 def get_friendly_dict(friend_list):
    '''Takes a list of reciprocal friendships links between individuals, 
    friend_list, and calculates the degree-one friends of each individual. 
    Returns a dictionary of sets containing all the immediate friends.'''
    # Creates a list of everyone.
    everyone_list = []
    for pairs in friend_list:
        for person in pairs:
            if person not in everyone_list:
                everyone_list.append(person)
    # Creates a dictionary using the people from everyone_list as keys, and 
    # assigns values as a set of immediate friends.
    current_friends = []
    friend_dict = {}
    for person in everyone_list:
        for pairs in friend_list:
            # Adds the other person in the pair to the list current_friends.
            if person in pairs:
                current_friends.append(pairs[pairs.index(person) - 1])
        friend_dict[person] = set(current_friends)
        current_friends = []
    return friend_dict
--- a/part2.py
+++ b/part2.py
@ -0,0 +1,26 @@
 # Title: Project 1 - Social Network Besties
 # Author: Rory Healy
 # Date created - 17th April 2019
 def friend_besties(individual, bestie_dict):
    '''Takes an individual's name, stored as a string "individual", and the 
    dictionary of sets of immediate friends, stored as a dictionary 
    "bestie_dict". Returns a sorted list of the individual's degree-one
    friends.'''
    # Creates an empty list, best_friends, and adds immediate friends of the
    # individual to the list. 
    best_friends = []
    for item in list(bestie_dict.items()):
        # Define the current person and their friends in the for loop.
        current_person = item[0]
        current_friends = item[1]
        if len(current_friends) == 0:
            return best_friends
        else:
            if current_person == individual:
                for friends in current_friends:
                    best_friends.append(friends)
    return sorted(best_friends)
--- a/part3.py
+++ b/part3.py
@ -0,0 +1,39 @@
 # Title: Project 1 - Social Network Second Besties
 # Author: Rory Healy
 # Date created - 17th April 2019
 # Date modified - 18th April 2019
 def friend_second_besties(individual, bestie_dict):
    '''Takes a person, stored as the string "individual", and the dictionary of
    everyone's friends, stored as the dictionary "bestie_dict", and
    returns a sorted list of the individual's degree-two friends.'''
    # Creates a list of all people who are two degrees of seperation away from
    # the individual.
    second_best_friends = []
    for item in list(bestie_dict.items()):
        current_deg1_friend = item[0]
        # Adds the degree-two friends to the list second_best_friends.
        if current_deg1_friend == individual:
            for current_deg2_friend in bestie_dict[current_deg1_friend]:
                second_best_friends.append(bestie_dict[current_deg2_friend])
    # Places the elements from the inner list of second_best_friends in the
    # list second_best_friends and deletes the inner list.
    return_list = []
    if second_best_friends == return_list:
        return_list = []
    else:
        for people in second_best_friends[0]:
            return_list.append(people)    
    # Removes the individual and any immediate friends from the list 
    # return_list.
    for deg2_friend in return_list:
        if deg2_friend == individual:
            return_list.remove(individual)
    for deg2_friend in return_list:
        if deg2_friend in bestie_dict[individual]:
            return_list.remove(deg2_friend)
    return sorted(return_list)
--- a/part4.py
+++ b/part4.py
@ -0,0 +1,47 @@
 # Title: Project 1 - Network Coverage
 # Author: Rory Healy
 # Date created - 18th April 2019
 def besties_coverage(individuals, bestie_dict, relationship_list):
    '''Takes a list of people "individuals", stored as strings in a list, a
    dictionary of sets of friends "bestie_dict", and a list of functions that
    define relationships in the social network, selected from friend_besties
    and friend_second_besties. Returns a float that corresponds to the
    proportion of individuals who are either a member of individuals or are
    connected via a relationship stated in relationship_list.'''
    # Calculate the total number of people in the network to calculate the
    # proportion.
    list_of_people = []
    for item in list(bestie_dict.items()):
        if item[0] not in list_of_people:
            list_of_people.append(item[0])
        number_of_people = len(list_of_people)
    # Calculates number of people in 'individuals' to calculate the proportion.
    number_of_individuals = len(individuals)
    # Calculates number of relationships the individual has to calculate the
    # proportion.
    for i in range(len(individuals)):
        number_of_besties = len(friend_besties(individuals[i], 
                                                      bestie_dict))
        number_of_second_besties = len(friend_second_besties(individuals[i], 
                                                      bestie_dict))
    number_of_relationships = 0
    if len(relationship_list) == 0:
        number_of_relationships = 0
    else:
        for relationship_type in relationship_list:
            if str(relationship_type) == str(friend_besties):
                number_of_relationships += number_of_besties
            elif str(relationship_type) == str(friend_second_besties):
                number_of_relationships += number_of_second_besties
    # Returns the proportion as defined in the docstring.
    number_of_connections = number_of_relationships + number_of_individuals
    coverage_proportion = number_of_connections / number_of_people
    return coverage_proportion
--- a/part5.py
+++ b/part5.py
@ -0,0 +1,34 @@
 # Title: Project 1 - Social Network Attribute Prediction
 # Author: Rory Healy
 # Date created - 18th April 2019
 def friendship_closeness():
    '''Assigns a "friendship-closeness" value, from 0 to 1, based on the number 
    of degrees of seperation between people. The further the seperation, the 
    closer this value is to 0. Takes a dictionary of sets of friends
    "bestie_dict" and returns a dictionary of sets of friends with their
    "friendship-closeness" value.'''
 def prioritise_friendships():
    '''Make the predictions prioritise those from the people with the highest 
    "friendship-closeness" value. Takes a user, stored as the string
    "unknown_user", and returns a list of sets of other people in order of 
    highest to lowest closeness (e.g. degree-one friends are in the first set,
    degree-two people are in the second set, etc.).'''
 def friendly_prediction(unknown_user, features, bestie_dict, feat_dict):
    '''Takes a user, stored as the string "unknown_user", features of the user 
    that are used to predict attributes, stored as a set "features", a
    dictionary of sets of friends "bestie_dict", and a dictionary containing 
    the known attributes for each user "feat_dict". Returns a dictionary of
    features with a predicted list of values for each feature.'''
    # For the first given example, the predicted favourite author comes from
    # Kim, while the predicted university comes from both Sandy and Alex. As
    # Sandy and Alex are both degree-two friends, preference can be given to
    # neither without further information. This is a main limitation for the
    # 'friendship-closeness' value being used to make a prediction, as the
    # unknown user may be closer to one degree-two friends than another. I
    # know that this isn't necessarily relevent to creating this program, just
    # something that I was thinking about while fixing up the other parts of
    # this project.
--- a/214
+++ b/214
@ -0,0 +1,214 @@
 Project 1 is all about "social networks", and the power of social connections,
 both in terms of how impressively large a portion of the social network can be
 accessed from a small number of seed users and their friends or 
 friends-of-friends, and how accurately the attributes of an individual can be
 predicted from (partial) attributes of their friends/friends-of-friends. A
 large part of the context for the project is in illustrating how it is that
 companies such as Cambridge Analytica are able to influence the world so
 impressively, from a small set of users of their products.
 Throughout the project, we will refer to individuals as "nodes" in the social
 network, and (mutual) friendship connections as "edges" connecting those nodes.
 See the lecture slides for more details.
 --------------------------------------------------------------------------------
 Part 1 - Friends
 Write a function get_friendly_dict() that calculates the degree-one friends of
 each individual in a social network. The function takes one argument:
 - friend_list, a list of reciproal friendship links between individuals.
 The function should return a dictionary of sets, containing the set of all
 "degree-one" (= immediate) friends for each individual in the social network.
 Note that the specific order of the individuals in the dictionary, and also the
 ordering of the friends in each set does not matter.
 The structure of friend_list is as follows: each element is a 2-tuple of
 strings, representing a pairing of names of individuals in the social network
 who are friends. Note that as friendship links are reciprocal, the 2-tuple
 ('kim', 'sandy'), e.g., indicates that 'kim' is a friend of 'sandy', and also
 that 'sandy' is a friend of 'kim'.
 Example function calls are:
    >>> get_friendly_dict([('kim', 'sandy'), ('alex', 'sandy'),
        ('kim', 'alex'), ('kim', 'glenn')])
    {'kim': {'glenn', 'sandy', 'alex'}, 'sandy': {'kim', 'alex'},
     'alex': {'sandy', 'kim'}, 'glenn': {'kim'}}
    >>> get_friendly_dict([('kim', 'sandy'), ('sandy', 'alex'),
        ('alex', 'glenn'), ('glenn', 'kim')])
    {'kim': {'glenn', 'sandy'}, 'sandy': {'kim', 'alex'},
     'alex': {'glenn', 'sandy'}, 'glenn': {'kim', 'alex'}}
 --------------------------------------------------------------------------------
 Part 2 - Social Network Besties
 Write a function friend_besties() that calculates the "besties" (i.e.
 degree-one friends) of a given individual in a social network. The function
 takes two arguments:
 - individual, an individual in the social network, in the form of a string ID
 - bestie_dict, a dictionary of sets of friends of each individual in the
  social network (as per the first question of the Project)
 The function should return a sorted list, made up of all "degree-one" friends
 for the individual. In the instance that the individual does not have any
 friends in the social network, the function should return an empty list.
 Example function calls are:
    >>> friend_besties('kim', {'kim': {'sandy', 'alex', 'glenn'},
        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
    ['alex', 'glenn', 'sandy']
    >>> friend_besties('ali', {'kim': {'sandy', 'alex', 'glenn'},
        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
    []
 --------------------------------------------------------------------------------
 Part 3 - Social Network Second Besties
 Write a function friend_second_besties() that calculates the "second-besties"
 (i.e. degree-two friends) of a given individual in a social network. The
 function takes two arguments:
 - individual, an individual in the social network, in the form of a string ID
 - bestie_dict, a dictionary of sets of friends of each individual in the
  social network (as per the first question of the Project)
 The function should return a sorted list, made up of all "degree-two" friends
 for the individual. In the instance that the individual does not have any
 degree-two friends in the social network, the function should return an
 empty list.
 Example function calls are:
    >>> friend_second_besties('glenn', {'kim': {'sandy', 'alex', 'glenn'},
        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
    ['alex', 'sandy']
    >>> friend_second_besties('kim', {'kim': {'sandy', 'alex', 'glenn'},
        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
    []
 --------------------------------------------------------------------------------
 Part 4 - Network Coverage
 Write a function besties_coverage() that computes the "coverage" of nodes
 within a social network that are connected via predefined relationships to a
 given list of individuals, i.e. the proportion of connected individuals, to the
 total size of the network (= the number of people in the social network). The
 function takes three arguments:
 - individuals, a list of individuals, each in the form of a string ID
 - bestie_dict, a dictionary of sets of friends of each individual in the
  social network (as per the first question of the Project)
 - relationship_list, a list of functions defining relationships in the
  social network, selected from friend_besties and friend_second_besties.
 The function should return a float, corresponding to the proportion of the
 total number of individuals who are either a member of individuals or connected
 via one of the relationships in relationship_list.
 Example calls to the function are:
    >>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}, 
        [])
    0.25
    >>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'}, 
        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
        [friend_besties])
    0.5
    >>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
        [friend_second_besties])
    0.75
    >>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
        'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
        [friend_besties, friend_second_besties])
    1.0
 --------------------------------------------------------------------------------
 Part 5 - Social Network Attribute Prediction
 The final question is for bonus marks, and is deliberately quite a bit harder
 than the four basic questions (and the number of marks on offer is deliberately
 not commensurate with the amount of effort required — bonus marks aren't meant
 to be easy to get!). Only attempt this is you have completed the earlier
 questions, and are up for a challenge!
 The context for the bonus question is the prediction of attributes of a user
 based on the attributes of their social network, and the observation that a
 user's friends often have very similar interests and background to that user
 (what is formally called homophily).
 Write a function friendly_prediction() which takes four arguments:
 - unknown_user, a string indicating the identity of the user you are to predict
  attributes for
 - features, a set of features you are to predict attributes for
 - bestie_dict, a dictionary of sets of the besties for each user in the
  dataset, following the same format as the earlier questions in the project
 - feat_dict, a dictionary containing the known attributes for each user in the
  training data, across a range of features; note that there is no guarantee
  that the attribute for a given feature will be known for every training user
 Your function should return a dictionary of features (based on features), with
 a predicted list of values for each.
 Your function should make its predictions as follows:
 - first, identify the set of besties for the given user, and for each feature
  of interest, determine the most-commonly attested attribute for that feature
  among the besties; in the case of a tie, the prediction should be a sorted
  list of attributes
 - second, for any features where no bestie has an attribute for that feature
  (meaning no prediction was possible in the first step), repeat the process
  using the second-besties, once again in the form of a sorted list
  of attributes
 - in the case that no bestie or second-bestie has that attribute, return an
  empty list.
 Note that all attributes will take the form of strings, with the empty string
 representing the fact that the user explicitly has no value for that feature
 (e.g. if the user did not go to university, the value for university would be
 ''), and the lack of an attribute for a given feature indicating that the
 attribute is unknown. Note further that even if the attribute for unknown_user
 is available in feat_dict, you should predict based on the attributes of
 besties and second besties.
 Example calls to the function are:
    >>> friendly_prediction('glenn', {'favourite author', 'university'},
        {'kim': {'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'},
        'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn':
        {'university': ''}, 'kim': {'favourite author': 'AA Milne'}, 'sandy':
        {'favourite author': 'JRR Tolkien',
        "university": "University of Melbourne"}, 'alex': {'favourite author':
        'AA Milne', 'university': 'Monash University'}})
    {'university': ['Monash University', 'University of Melbourne'],
     'favourite author': ['AA Milne']}
    >>> friendly_prediction('kim', {'university'}, {'kim':
        {'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'}, 'alex':
        {'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn': {'university': ''},
        'kim': {'favourite author': 'AA Milne'}, 'sandy': {'favourite author':
        'JRR Tolkien', "university": "University of Melbourne"}, 'alex':
        {'favourite author': 'AA Milne', 'university': 'Monash University'}})
    {'university': ['', 'Monash University', 'University of Melbourne']}
    >>> friendly_prediction('kim', {'birthplace'}, {'kim':
        {'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'}, 'alex':
        {'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn': {'university': ''},
        'kim': {'favourite author': 'AA Milne'}, 'sandy': {'favourite author':
        'JRR Tolkien', "university": "University of Melbourne"}, 'alex':
        {'favourite author': 'AA Milne', 'university': 'Monash University'}})
    {'birthplace': []}
--- a/project01-marks-updated.pdf
+++ b/project01-marks-updated.pdf
--- a/project01-marks.pdf
+++ b/project01-marks.pdf
--- a/project01-rubric.pdf
+++ b/project01-rubric.pdf
--- a/project01-sample-solutions.py
+++ b/project01-sample-solutions.py
@ -0,0 +1,134 @@
 # ------------------------------------------------------------------------------
 # Part 1
 def add_friend(individual, friend, friend_dict):
    """add `friend` as friend of `individual` in `friend_dict`"""
    if individual not in friend_dict:
        friend_dict[individual] = set()
    friend_dict[individual].add(friend)
 def get_friendly_dict(friend_list):
    """take `friend_list` (list of undirected friendship links)
    and return set of all direct friends of each user in that
    list, in form of dictionary of sets"""
    # generate dictionary of all friends of users in `friend_list`
    friend_dict = {}
    for (node1, node2) in friend_list:
        add_friend(node1, node2, friend_dict)
        add_friend(node2, node1, friend_dict)
    return friend_dict
 # ------------------------------------------------------------------------------
 # Part 2
 def friend_besties(individual, bestie_dict):
    """generate the sorted set of (first-order) friends for `individual`, by 
    look-up in `bestie_dict`"""
    if individual in bestie_dict:
        besties = bestie_dict[individual]
    else:
        besties = set()
    return sorted(besties)
 # ------------------------------------------------------------------------------
 # Part 3
 def friend_second_besties(individual, bestie_dict):
    """generate the set of (strictly) second-order friends for 
    `individual`, based on the contents of `bestie_dict`"""
    # extract out friends of friends for each individual in
    # `individual_list`, by finding friends of each individual,
    # and friends of those friends, making sure that
    # degree of separation is strictly 2
    second_besties = set()
    if individual in bestie_dict:
        for bestie in bestie_dict[individual]:
            if bestie in bestie_dict:
                second_besties = second_besties.union(bestie_dict[bestie])
        # remove anyone who is a direct friend or the individual themself
        second_besties = second_besties.difference(
                            bestie_dict[individual].union(set([individual])))
    return sorted(second_besties)
 # ------------------------------------------------------------------------------
 # Part 4
 def besties_coverage(individuals, bestie_dict, relationship_list):
    """calculate what proportion of individuals in `friend_dict` are
    in the full list of individuals (`everyone_list`)"""
    # get the set of everyone connected to each individual in
    # `individuals` via the relations in `relationship_list`
    # (including the individuals themselves)
    connected = set()
    for individual in individuals:
        # only add to set of individuals if network
        if individual in bestie_dict:
            connected.add(individual)
        for relationship in relationship_list:
            connected = connected.union(relationship(individual, bestie_dict))
    return len(connected) / len(bestie_dict)
 # ------------------------------------------------------------------------------
 # Part 5
 from collections import defaultdict
 def predict_attribute(friends, feat_dict, feature):
    """predict the value of `feature` from the set `friends` based on the
    attributes in `feat_dict`"""
    # accumulator for attribute freqs
    val_count = defaultdict(int)
    # for each friend, add vote for relevant attribute if they have it
    for friend in friends:
        if friend in feat_dict and feature in feat_dict[friend]:
            val_count[feat_dict[friend][feature]] += 1
    # find the attributes with the highest frequency and return as
    # sorted list, assuming at least one attribute prediction made
    if val_count:
        max_count = 0
        for attribute, count in val_count.items():
            if count > max_count:
                att_list = [attribute]
                max_count = count
            elif count == max_count:
                att_list.append(attribute)
        return sorted(att_list)
    # if no users with relevant attribute, no prediction to be made
    else:
        return []
 def friendly_prediction(unknown_user, features, bestie_dict, feat_dict):
    """predict the attributes of `unknown_user` for each feature in `features`,
    based on the social network in `bestie_dict` and user attribute data in
    `feat_dict`, and return the predictions in the form of a dictionary 
    of lists"""
    # dictionary of predictions for each feature
    predictions = {}
    # predict attribute for each feature based on besties, and failing that,
    # second besties
    for feature in features:
        besties_predict = predict_attribute(friend_besties(unknown_user,
                                            bestie_dict), feat_dict, feature)
        if besties_predict:
            predictions[feature] = besties_predict
        else:
            second_besties_predict = predict_attribute(friend_second_besties(
                unknown_user, bestie_dict), feat_dict, feature)
            predictions[feature] = second_besties_predict
    return predictions