Initial commit. Note this is my assignment submission as is - no modifications have been made since.

This commit is contained in:
Rory Healy 2024-06-08 21:05:12 +10:00
commit fa1fa79ac7
Signed by: roryhealy
GPG key ID: 0A3CBDE9C2AE672F
10 changed files with 524 additions and 0 deletions

30
part1.py Normal file
View file

@ -0,0 +1,30 @@
# Title: Project 1 - Friends
# Author: Rory Healy
# Date created - 16th April 2019
# Date modified - 18th April 2019
def get_friendly_dict(friend_list):
'''Takes a list of reciprocal friendships links between individuals,
friend_list, and calculates the degree-one friends of each individual.
Returns a dictionary of sets containing all the immediate friends.'''
# Creates a list of everyone.
everyone_list = []
for pairs in friend_list:
for person in pairs:
if person not in everyone_list:
everyone_list.append(person)
# Creates a dictionary using the people from everyone_list as keys, and
# assigns values as a set of immediate friends.
current_friends = []
friend_dict = {}
for person in everyone_list:
for pairs in friend_list:
# Adds the other person in the pair to the list current_friends.
if person in pairs:
current_friends.append(pairs[pairs.index(person) - 1])
friend_dict[person] = set(current_friends)
current_friends = []
return friend_dict

26
part2.py Normal file
View file

@ -0,0 +1,26 @@
# Title: Project 1 - Social Network Besties
# Author: Rory Healy
# Date created - 17th April 2019
def friend_besties(individual, bestie_dict):
'''Takes an individual's name, stored as a string "individual", and the
dictionary of sets of immediate friends, stored as a dictionary
"bestie_dict". Returns a sorted list of the individual's degree-one
friends.'''
# Creates an empty list, best_friends, and adds immediate friends of the
# individual to the list.
best_friends = []
for item in list(bestie_dict.items()):
# Define the current person and their friends in the for loop.
current_person = item[0]
current_friends = item[1]
if len(current_friends) == 0:
return best_friends
else:
if current_person == individual:
for friends in current_friends:
best_friends.append(friends)
return sorted(best_friends)

39
part3.py Normal file
View file

@ -0,0 +1,39 @@
# Title: Project 1 - Social Network Second Besties
# Author: Rory Healy
# Date created - 17th April 2019
# Date modified - 18th April 2019
def friend_second_besties(individual, bestie_dict):
'''Takes a person, stored as the string "individual", and the dictionary of
everyone's friends, stored as the dictionary "bestie_dict", and
returns a sorted list of the individual's degree-two friends.'''
# Creates a list of all people who are two degrees of seperation away from
# the individual.
second_best_friends = []
for item in list(bestie_dict.items()):
current_deg1_friend = item[0]
# Adds the degree-two friends to the list second_best_friends.
if current_deg1_friend == individual:
for current_deg2_friend in bestie_dict[current_deg1_friend]:
second_best_friends.append(bestie_dict[current_deg2_friend])
# Places the elements from the inner list of second_best_friends in the
# list second_best_friends and deletes the inner list.
return_list = []
if second_best_friends == return_list:
return_list = []
else:
for people in second_best_friends[0]:
return_list.append(people)
# Removes the individual and any immediate friends from the list
# return_list.
for deg2_friend in return_list:
if deg2_friend == individual:
return_list.remove(individual)
for deg2_friend in return_list:
if deg2_friend in bestie_dict[individual]:
return_list.remove(deg2_friend)
return sorted(return_list)

47
part4.py Normal file
View file

@ -0,0 +1,47 @@
# Title: Project 1 - Network Coverage
# Author: Rory Healy
# Date created - 18th April 2019
def besties_coverage(individuals, bestie_dict, relationship_list):
'''Takes a list of people "individuals", stored as strings in a list, a
dictionary of sets of friends "bestie_dict", and a list of functions that
define relationships in the social network, selected from friend_besties
and friend_second_besties. Returns a float that corresponds to the
proportion of individuals who are either a member of individuals or are
connected via a relationship stated in relationship_list.'''
# Calculate the total number of people in the network to calculate the
# proportion.
list_of_people = []
for item in list(bestie_dict.items()):
if item[0] not in list_of_people:
list_of_people.append(item[0])
number_of_people = len(list_of_people)
# Calculates number of people in 'individuals' to calculate the proportion.
number_of_individuals = len(individuals)
# Calculates number of relationships the individual has to calculate the
# proportion.
for i in range(len(individuals)):
number_of_besties = len(friend_besties(individuals[i],
bestie_dict))
number_of_second_besties = len(friend_second_besties(individuals[i],
bestie_dict))
number_of_relationships = 0
if len(relationship_list) == 0:
number_of_relationships = 0
else:
for relationship_type in relationship_list:
if str(relationship_type) == str(friend_besties):
number_of_relationships += number_of_besties
elif str(relationship_type) == str(friend_second_besties):
number_of_relationships += number_of_second_besties
# Returns the proportion as defined in the docstring.
number_of_connections = number_of_relationships + number_of_individuals
coverage_proportion = number_of_connections / number_of_people
return coverage_proportion

34
part5.py Normal file
View file

@ -0,0 +1,34 @@
# Title: Project 1 - Social Network Attribute Prediction
# Author: Rory Healy
# Date created - 18th April 2019
def friendship_closeness():
'''Assigns a "friendship-closeness" value, from 0 to 1, based on the number
of degrees of seperation between people. The further the seperation, the
closer this value is to 0. Takes a dictionary of sets of friends
"bestie_dict" and returns a dictionary of sets of friends with their
"friendship-closeness" value.'''
def prioritise_friendships():
'''Make the predictions prioritise those from the people with the highest
"friendship-closeness" value. Takes a user, stored as the string
"unknown_user", and returns a list of sets of other people in order of
highest to lowest closeness (e.g. degree-one friends are in the first set,
degree-two people are in the second set, etc.).'''
def friendly_prediction(unknown_user, features, bestie_dict, feat_dict):
'''Takes a user, stored as the string "unknown_user", features of the user
that are used to predict attributes, stored as a set "features", a
dictionary of sets of friends "bestie_dict", and a dictionary containing
the known attributes for each user "feat_dict". Returns a dictionary of
features with a predicted list of values for each feature.'''
# For the first given example, the predicted favourite author comes from
# Kim, while the predicted university comes from both Sandy and Alex. As
# Sandy and Alex are both degree-two friends, preference can be given to
# neither without further information. This is a main limitation for the
# 'friendship-closeness' value being used to make a prediction, as the
# unknown user may be closer to one degree-two friends than another. I
# know that this isn't necessarily relevent to creating this program, just
# something that I was thinking about while fixing up the other parts of
# this project.

214
project01 Normal file
View file

@ -0,0 +1,214 @@
Project 1 is all about "social networks", and the power of social connections,
both in terms of how impressively large a portion of the social network can be
accessed from a small number of seed users and their friends or
friends-of-friends, and how accurately the attributes of an individual can be
predicted from (partial) attributes of their friends/friends-of-friends. A
large part of the context for the project is in illustrating how it is that
companies such as Cambridge Analytica are able to influence the world so
impressively, from a small set of users of their products.
Throughout the project, we will refer to individuals as "nodes" in the social
network, and (mutual) friendship connections as "edges" connecting those nodes.
See the lecture slides for more details.
--------------------------------------------------------------------------------
Part 1 - Friends
Write a function get_friendly_dict() that calculates the degree-one friends of
each individual in a social network. The function takes one argument:
- friend_list, a list of reciproal friendship links between individuals.
The function should return a dictionary of sets, containing the set of all
"degree-one" (= immediate) friends for each individual in the social network.
Note that the specific order of the individuals in the dictionary, and also the
ordering of the friends in each set does not matter.
The structure of friend_list is as follows: each element is a 2-tuple of
strings, representing a pairing of names of individuals in the social network
who are friends. Note that as friendship links are reciprocal, the 2-tuple
('kim', 'sandy'), e.g., indicates that 'kim' is a friend of 'sandy', and also
that 'sandy' is a friend of 'kim'.
Example function calls are:
>>> get_friendly_dict([('kim', 'sandy'), ('alex', 'sandy'),
('kim', 'alex'), ('kim', 'glenn')])
{'kim': {'glenn', 'sandy', 'alex'}, 'sandy': {'kim', 'alex'},
'alex': {'sandy', 'kim'}, 'glenn': {'kim'}}
>>> get_friendly_dict([('kim', 'sandy'), ('sandy', 'alex'),
('alex', 'glenn'), ('glenn', 'kim')])
{'kim': {'glenn', 'sandy'}, 'sandy': {'kim', 'alex'},
'alex': {'glenn', 'sandy'}, 'glenn': {'kim', 'alex'}}
--------------------------------------------------------------------------------
Part 2 - Social Network Besties
Write a function friend_besties() that calculates the "besties" (i.e.
degree-one friends) of a given individual in a social network. The function
takes two arguments:
- individual, an individual in the social network, in the form of a string ID
- bestie_dict, a dictionary of sets of friends of each individual in the
social network (as per the first question of the Project)
The function should return a sorted list, made up of all "degree-one" friends
for the individual. In the instance that the individual does not have any
friends in the social network, the function should return an empty list.
Example function calls are:
>>> friend_besties('kim', {'kim': {'sandy', 'alex', 'glenn'},
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
['alex', 'glenn', 'sandy']
>>> friend_besties('ali', {'kim': {'sandy', 'alex', 'glenn'},
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
[]
--------------------------------------------------------------------------------
Part 3 - Social Network Second Besties
Write a function friend_second_besties() that calculates the "second-besties"
(i.e. degree-two friends) of a given individual in a social network. The
function takes two arguments:
- individual, an individual in the social network, in the form of a string ID
- bestie_dict, a dictionary of sets of friends of each individual in the
social network (as per the first question of the Project)
The function should return a sorted list, made up of all "degree-two" friends
for the individual. In the instance that the individual does not have any
degree-two friends in the social network, the function should return an
empty list.
Example function calls are:
>>> friend_second_besties('glenn', {'kim': {'sandy', 'alex', 'glenn'},
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
['alex', 'sandy']
>>> friend_second_besties('kim', {'kim': {'sandy', 'alex', 'glenn'},
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
[]
--------------------------------------------------------------------------------
Part 4 - Network Coverage
Write a function besties_coverage() that computes the "coverage" of nodes
within a social network that are connected via predefined relationships to a
given list of individuals, i.e. the proportion of connected individuals, to the
total size of the network (= the number of people in the social network). The
function takes three arguments:
- individuals, a list of individuals, each in the form of a string ID
- bestie_dict, a dictionary of sets of friends of each individual in the
social network (as per the first question of the Project)
- relationship_list, a list of functions defining relationships in the
social network, selected from friend_besties and friend_second_besties.
The function should return a float, corresponding to the proportion of the
total number of individuals who are either a member of individuals or connected
via one of the relationships in relationship_list.
Example calls to the function are:
>>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
[])
0.25
>>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
[friend_besties])
0.5
>>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
[friend_second_besties])
0.75
>>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
[friend_besties, friend_second_besties])
1.0
--------------------------------------------------------------------------------
Part 5 - Social Network Attribute Prediction
The final question is for bonus marks, and is deliberately quite a bit harder
than the four basic questions (and the number of marks on offer is deliberately
not commensurate with the amount of effort required — bonus marks aren't meant
to be easy to get!). Only attempt this is you have completed the earlier
questions, and are up for a challenge!
The context for the bonus question is the prediction of attributes of a user
based on the attributes of their social network, and the observation that a
user's friends often have very similar interests and background to that user
(what is formally called homophily).
Write a function friendly_prediction() which takes four arguments:
- unknown_user, a string indicating the identity of the user you are to predict
attributes for
- features, a set of features you are to predict attributes for
- bestie_dict, a dictionary of sets of the besties for each user in the
dataset, following the same format as the earlier questions in the project
- feat_dict, a dictionary containing the known attributes for each user in the
training data, across a range of features; note that there is no guarantee
that the attribute for a given feature will be known for every training user
Your function should return a dictionary of features (based on features), with
a predicted list of values for each.
Your function should make its predictions as follows:
- first, identify the set of besties for the given user, and for each feature
of interest, determine the most-commonly attested attribute for that feature
among the besties; in the case of a tie, the prediction should be a sorted
list of attributes
- second, for any features where no bestie has an attribute for that feature
(meaning no prediction was possible in the first step), repeat the process
using the second-besties, once again in the form of a sorted list
of attributes
- in the case that no bestie or second-bestie has that attribute, return an
empty list.
Note that all attributes will take the form of strings, with the empty string
representing the fact that the user explicitly has no value for that feature
(e.g. if the user did not go to university, the value for university would be
''), and the lack of an attribute for a given feature indicating that the
attribute is unknown. Note further that even if the attribute for unknown_user
is available in feat_dict, you should predict based on the attributes of
besties and second besties.
Example calls to the function are:
>>> friendly_prediction('glenn', {'favourite author', 'university'},
{'kim': {'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'},
'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn':
{'university': ''}, 'kim': {'favourite author': 'AA Milne'}, 'sandy':
{'favourite author': 'JRR Tolkien',
"university": "University of Melbourne"}, 'alex': {'favourite author':
'AA Milne', 'university': 'Monash University'}})
{'university': ['Monash University', 'University of Melbourne'],
'favourite author': ['AA Milne']}
>>> friendly_prediction('kim', {'university'}, {'kim':
{'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'}, 'alex':
{'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn': {'university': ''},
'kim': {'favourite author': 'AA Milne'}, 'sandy': {'favourite author':
'JRR Tolkien', "university": "University of Melbourne"}, 'alex':
{'favourite author': 'AA Milne', 'university': 'Monash University'}})
{'university': ['', 'Monash University', 'University of Melbourne']}
>>> friendly_prediction('kim', {'birthplace'}, {'kim':
{'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'}, 'alex':
{'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn': {'university': ''},
'kim': {'favourite author': 'AA Milne'}, 'sandy': {'favourite author':
'JRR Tolkien', "university": "University of Melbourne"}, 'alex':
{'favourite author': 'AA Milne', 'university': 'Monash University'}})
{'birthplace': []}

BIN
project01-marks-updated.pdf Executable file

Binary file not shown.

BIN
project01-marks.pdf Executable file

Binary file not shown.

BIN
project01-rubric.pdf Executable file

Binary file not shown.

View file

@ -0,0 +1,134 @@
# ------------------------------------------------------------------------------
# Part 1
def add_friend(individual, friend, friend_dict):
"""add `friend` as friend of `individual` in `friend_dict`"""
if individual not in friend_dict:
friend_dict[individual] = set()
friend_dict[individual].add(friend)
def get_friendly_dict(friend_list):
"""take `friend_list` (list of undirected friendship links)
and return set of all direct friends of each user in that
list, in form of dictionary of sets"""
# generate dictionary of all friends of users in `friend_list`
friend_dict = {}
for (node1, node2) in friend_list:
add_friend(node1, node2, friend_dict)
add_friend(node2, node1, friend_dict)
return friend_dict
# ------------------------------------------------------------------------------
# Part 2
def friend_besties(individual, bestie_dict):
"""generate the sorted set of (first-order) friends for `individual`, by
look-up in `bestie_dict`"""
if individual in bestie_dict:
besties = bestie_dict[individual]
else:
besties = set()
return sorted(besties)
# ------------------------------------------------------------------------------
# Part 3
def friend_second_besties(individual, bestie_dict):
"""generate the set of (strictly) second-order friends for
`individual`, based on the contents of `bestie_dict`"""
# extract out friends of friends for each individual in
# `individual_list`, by finding friends of each individual,
# and friends of those friends, making sure that
# degree of separation is strictly 2
second_besties = set()
if individual in bestie_dict:
for bestie in bestie_dict[individual]:
if bestie in bestie_dict:
second_besties = second_besties.union(bestie_dict[bestie])
# remove anyone who is a direct friend or the individual themself
second_besties = second_besties.difference(
bestie_dict[individual].union(set([individual])))
return sorted(second_besties)
# ------------------------------------------------------------------------------
# Part 4
def besties_coverage(individuals, bestie_dict, relationship_list):
"""calculate what proportion of individuals in `friend_dict` are
in the full list of individuals (`everyone_list`)"""
# get the set of everyone connected to each individual in
# `individuals` via the relations in `relationship_list`
# (including the individuals themselves)
connected = set()
for individual in individuals:
# only add to set of individuals if network
if individual in bestie_dict:
connected.add(individual)
for relationship in relationship_list:
connected = connected.union(relationship(individual, bestie_dict))
return len(connected) / len(bestie_dict)
# ------------------------------------------------------------------------------
# Part 5
from collections import defaultdict
def predict_attribute(friends, feat_dict, feature):
"""predict the value of `feature` from the set `friends` based on the
attributes in `feat_dict`"""
# accumulator for attribute freqs
val_count = defaultdict(int)
# for each friend, add vote for relevant attribute if they have it
for friend in friends:
if friend in feat_dict and feature in feat_dict[friend]:
val_count[feat_dict[friend][feature]] += 1
# find the attributes with the highest frequency and return as
# sorted list, assuming at least one attribute prediction made
if val_count:
max_count = 0
for attribute, count in val_count.items():
if count > max_count:
att_list = [attribute]
max_count = count
elif count == max_count:
att_list.append(attribute)
return sorted(att_list)
# if no users with relevant attribute, no prediction to be made
else:
return []
def friendly_prediction(unknown_user, features, bestie_dict, feat_dict):
"""predict the attributes of `unknown_user` for each feature in `features`,
based on the social network in `bestie_dict` and user attribute data in
`feat_dict`, and return the predictions in the form of a dictionary
of lists"""
# dictionary of predictions for each feature
predictions = {}
# predict attribute for each feature based on besties, and failing that,
# second besties
for feature in features:
besties_predict = predict_attribute(friend_besties(unknown_user,
bestie_dict), feat_dict, feature)
if besties_predict:
predictions[feature] = besties_predict
else:
second_besties_predict = predict_attribute(friend_second_besties(
unknown_user, bestie_dict), feat_dict, feature)
predictions[feature] = second_besties_predict
return predictions