Initial commit. Note this is my assignment submission as is - no modifications have been made since.
This commit is contained in:
commit
fa1fa79ac7
10 changed files with 524 additions and 0 deletions
30
part1.py
Normal file
30
part1.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
# Title: Project 1 - Friends
|
||||||
|
# Author: Rory Healy
|
||||||
|
# Date created - 16th April 2019
|
||||||
|
# Date modified - 18th April 2019
|
||||||
|
|
||||||
|
def get_friendly_dict(friend_list):
|
||||||
|
'''Takes a list of reciprocal friendships links between individuals,
|
||||||
|
friend_list, and calculates the degree-one friends of each individual.
|
||||||
|
Returns a dictionary of sets containing all the immediate friends.'''
|
||||||
|
|
||||||
|
# Creates a list of everyone.
|
||||||
|
everyone_list = []
|
||||||
|
for pairs in friend_list:
|
||||||
|
for person in pairs:
|
||||||
|
if person not in everyone_list:
|
||||||
|
everyone_list.append(person)
|
||||||
|
|
||||||
|
# Creates a dictionary using the people from everyone_list as keys, and
|
||||||
|
# assigns values as a set of immediate friends.
|
||||||
|
current_friends = []
|
||||||
|
friend_dict = {}
|
||||||
|
for person in everyone_list:
|
||||||
|
for pairs in friend_list:
|
||||||
|
# Adds the other person in the pair to the list current_friends.
|
||||||
|
if person in pairs:
|
||||||
|
current_friends.append(pairs[pairs.index(person) - 1])
|
||||||
|
friend_dict[person] = set(current_friends)
|
||||||
|
current_friends = []
|
||||||
|
|
||||||
|
return friend_dict
|
26
part2.py
Normal file
26
part2.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
# Title: Project 1 - Social Network Besties
|
||||||
|
# Author: Rory Healy
|
||||||
|
# Date created - 17th April 2019
|
||||||
|
|
||||||
|
def friend_besties(individual, bestie_dict):
|
||||||
|
'''Takes an individual's name, stored as a string "individual", and the
|
||||||
|
dictionary of sets of immediate friends, stored as a dictionary
|
||||||
|
"bestie_dict". Returns a sorted list of the individual's degree-one
|
||||||
|
friends.'''
|
||||||
|
|
||||||
|
# Creates an empty list, best_friends, and adds immediate friends of the
|
||||||
|
# individual to the list.
|
||||||
|
best_friends = []
|
||||||
|
for item in list(bestie_dict.items()):
|
||||||
|
# Define the current person and their friends in the for loop.
|
||||||
|
current_person = item[0]
|
||||||
|
current_friends = item[1]
|
||||||
|
|
||||||
|
if len(current_friends) == 0:
|
||||||
|
return best_friends
|
||||||
|
else:
|
||||||
|
if current_person == individual:
|
||||||
|
for friends in current_friends:
|
||||||
|
best_friends.append(friends)
|
||||||
|
|
||||||
|
return sorted(best_friends)
|
39
part3.py
Normal file
39
part3.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
# Title: Project 1 - Social Network Second Besties
|
||||||
|
# Author: Rory Healy
|
||||||
|
# Date created - 17th April 2019
|
||||||
|
# Date modified - 18th April 2019
|
||||||
|
|
||||||
|
def friend_second_besties(individual, bestie_dict):
|
||||||
|
'''Takes a person, stored as the string "individual", and the dictionary of
|
||||||
|
everyone's friends, stored as the dictionary "bestie_dict", and
|
||||||
|
returns a sorted list of the individual's degree-two friends.'''
|
||||||
|
|
||||||
|
# Creates a list of all people who are two degrees of seperation away from
|
||||||
|
# the individual.
|
||||||
|
second_best_friends = []
|
||||||
|
for item in list(bestie_dict.items()):
|
||||||
|
current_deg1_friend = item[0]
|
||||||
|
# Adds the degree-two friends to the list second_best_friends.
|
||||||
|
if current_deg1_friend == individual:
|
||||||
|
for current_deg2_friend in bestie_dict[current_deg1_friend]:
|
||||||
|
second_best_friends.append(bestie_dict[current_deg2_friend])
|
||||||
|
|
||||||
|
# Places the elements from the inner list of second_best_friends in the
|
||||||
|
# list second_best_friends and deletes the inner list.
|
||||||
|
return_list = []
|
||||||
|
if second_best_friends == return_list:
|
||||||
|
return_list = []
|
||||||
|
else:
|
||||||
|
for people in second_best_friends[0]:
|
||||||
|
return_list.append(people)
|
||||||
|
|
||||||
|
# Removes the individual and any immediate friends from the list
|
||||||
|
# return_list.
|
||||||
|
for deg2_friend in return_list:
|
||||||
|
if deg2_friend == individual:
|
||||||
|
return_list.remove(individual)
|
||||||
|
for deg2_friend in return_list:
|
||||||
|
if deg2_friend in bestie_dict[individual]:
|
||||||
|
return_list.remove(deg2_friend)
|
||||||
|
|
||||||
|
return sorted(return_list)
|
47
part4.py
Normal file
47
part4.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
# Title: Project 1 - Network Coverage
|
||||||
|
# Author: Rory Healy
|
||||||
|
# Date created - 18th April 2019
|
||||||
|
|
||||||
|
def besties_coverage(individuals, bestie_dict, relationship_list):
|
||||||
|
'''Takes a list of people "individuals", stored as strings in a list, a
|
||||||
|
dictionary of sets of friends "bestie_dict", and a list of functions that
|
||||||
|
define relationships in the social network, selected from friend_besties
|
||||||
|
and friend_second_besties. Returns a float that corresponds to the
|
||||||
|
proportion of individuals who are either a member of individuals or are
|
||||||
|
connected via a relationship stated in relationship_list.'''
|
||||||
|
|
||||||
|
# Calculate the total number of people in the network to calculate the
|
||||||
|
# proportion.
|
||||||
|
list_of_people = []
|
||||||
|
|
||||||
|
for item in list(bestie_dict.items()):
|
||||||
|
if item[0] not in list_of_people:
|
||||||
|
list_of_people.append(item[0])
|
||||||
|
number_of_people = len(list_of_people)
|
||||||
|
|
||||||
|
# Calculates number of people in 'individuals' to calculate the proportion.
|
||||||
|
number_of_individuals = len(individuals)
|
||||||
|
|
||||||
|
# Calculates number of relationships the individual has to calculate the
|
||||||
|
# proportion.
|
||||||
|
for i in range(len(individuals)):
|
||||||
|
number_of_besties = len(friend_besties(individuals[i],
|
||||||
|
bestie_dict))
|
||||||
|
number_of_second_besties = len(friend_second_besties(individuals[i],
|
||||||
|
bestie_dict))
|
||||||
|
|
||||||
|
number_of_relationships = 0
|
||||||
|
|
||||||
|
if len(relationship_list) == 0:
|
||||||
|
number_of_relationships = 0
|
||||||
|
else:
|
||||||
|
for relationship_type in relationship_list:
|
||||||
|
if str(relationship_type) == str(friend_besties):
|
||||||
|
number_of_relationships += number_of_besties
|
||||||
|
elif str(relationship_type) == str(friend_second_besties):
|
||||||
|
number_of_relationships += number_of_second_besties
|
||||||
|
|
||||||
|
# Returns the proportion as defined in the docstring.
|
||||||
|
number_of_connections = number_of_relationships + number_of_individuals
|
||||||
|
coverage_proportion = number_of_connections / number_of_people
|
||||||
|
return coverage_proportion
|
34
part5.py
Normal file
34
part5.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
# Title: Project 1 - Social Network Attribute Prediction
|
||||||
|
# Author: Rory Healy
|
||||||
|
# Date created - 18th April 2019
|
||||||
|
|
||||||
|
def friendship_closeness():
|
||||||
|
'''Assigns a "friendship-closeness" value, from 0 to 1, based on the number
|
||||||
|
of degrees of seperation between people. The further the seperation, the
|
||||||
|
closer this value is to 0. Takes a dictionary of sets of friends
|
||||||
|
"bestie_dict" and returns a dictionary of sets of friends with their
|
||||||
|
"friendship-closeness" value.'''
|
||||||
|
|
||||||
|
def prioritise_friendships():
|
||||||
|
'''Make the predictions prioritise those from the people with the highest
|
||||||
|
"friendship-closeness" value. Takes a user, stored as the string
|
||||||
|
"unknown_user", and returns a list of sets of other people in order of
|
||||||
|
highest to lowest closeness (e.g. degree-one friends are in the first set,
|
||||||
|
degree-two people are in the second set, etc.).'''
|
||||||
|
|
||||||
|
def friendly_prediction(unknown_user, features, bestie_dict, feat_dict):
|
||||||
|
'''Takes a user, stored as the string "unknown_user", features of the user
|
||||||
|
that are used to predict attributes, stored as a set "features", a
|
||||||
|
dictionary of sets of friends "bestie_dict", and a dictionary containing
|
||||||
|
the known attributes for each user "feat_dict". Returns a dictionary of
|
||||||
|
features with a predicted list of values for each feature.'''
|
||||||
|
|
||||||
|
# For the first given example, the predicted favourite author comes from
|
||||||
|
# Kim, while the predicted university comes from both Sandy and Alex. As
|
||||||
|
# Sandy and Alex are both degree-two friends, preference can be given to
|
||||||
|
# neither without further information. This is a main limitation for the
|
||||||
|
# 'friendship-closeness' value being used to make a prediction, as the
|
||||||
|
# unknown user may be closer to one degree-two friends than another. I
|
||||||
|
# know that this isn't necessarily relevent to creating this program, just
|
||||||
|
# something that I was thinking about while fixing up the other parts of
|
||||||
|
# this project.
|
214
project01
Normal file
214
project01
Normal file
|
@ -0,0 +1,214 @@
|
||||||
|
Project 1 is all about "social networks", and the power of social connections,
|
||||||
|
both in terms of how impressively large a portion of the social network can be
|
||||||
|
accessed from a small number of seed users and their friends or
|
||||||
|
friends-of-friends, and how accurately the attributes of an individual can be
|
||||||
|
predicted from (partial) attributes of their friends/friends-of-friends. A
|
||||||
|
large part of the context for the project is in illustrating how it is that
|
||||||
|
companies such as Cambridge Analytica are able to influence the world so
|
||||||
|
impressively, from a small set of users of their products.
|
||||||
|
|
||||||
|
Throughout the project, we will refer to individuals as "nodes" in the social
|
||||||
|
network, and (mutual) friendship connections as "edges" connecting those nodes.
|
||||||
|
See the lecture slides for more details.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
Part 1 - Friends
|
||||||
|
|
||||||
|
Write a function get_friendly_dict() that calculates the degree-one friends of
|
||||||
|
each individual in a social network. The function takes one argument:
|
||||||
|
|
||||||
|
- friend_list, a list of reciproal friendship links between individuals.
|
||||||
|
|
||||||
|
The function should return a dictionary of sets, containing the set of all
|
||||||
|
"degree-one" (= immediate) friends for each individual in the social network.
|
||||||
|
Note that the specific order of the individuals in the dictionary, and also the
|
||||||
|
ordering of the friends in each set does not matter.
|
||||||
|
|
||||||
|
The structure of friend_list is as follows: each element is a 2-tuple of
|
||||||
|
strings, representing a pairing of names of individuals in the social network
|
||||||
|
who are friends. Note that as friendship links are reciprocal, the 2-tuple
|
||||||
|
('kim', 'sandy'), e.g., indicates that 'kim' is a friend of 'sandy', and also
|
||||||
|
that 'sandy' is a friend of 'kim'.
|
||||||
|
|
||||||
|
Example function calls are:
|
||||||
|
|
||||||
|
>>> get_friendly_dict([('kim', 'sandy'), ('alex', 'sandy'),
|
||||||
|
('kim', 'alex'), ('kim', 'glenn')])
|
||||||
|
{'kim': {'glenn', 'sandy', 'alex'}, 'sandy': {'kim', 'alex'},
|
||||||
|
'alex': {'sandy', 'kim'}, 'glenn': {'kim'}}
|
||||||
|
|
||||||
|
>>> get_friendly_dict([('kim', 'sandy'), ('sandy', 'alex'),
|
||||||
|
('alex', 'glenn'), ('glenn', 'kim')])
|
||||||
|
{'kim': {'glenn', 'sandy'}, 'sandy': {'kim', 'alex'},
|
||||||
|
'alex': {'glenn', 'sandy'}, 'glenn': {'kim', 'alex'}}
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
Part 2 - Social Network Besties
|
||||||
|
|
||||||
|
Write a function friend_besties() that calculates the "besties" (i.e.
|
||||||
|
degree-one friends) of a given individual in a social network. The function
|
||||||
|
takes two arguments:
|
||||||
|
|
||||||
|
- individual, an individual in the social network, in the form of a string ID
|
||||||
|
- bestie_dict, a dictionary of sets of friends of each individual in the
|
||||||
|
social network (as per the first question of the Project)
|
||||||
|
|
||||||
|
The function should return a sorted list, made up of all "degree-one" friends
|
||||||
|
for the individual. In the instance that the individual does not have any
|
||||||
|
friends in the social network, the function should return an empty list.
|
||||||
|
|
||||||
|
Example function calls are:
|
||||||
|
|
||||||
|
>>> friend_besties('kim', {'kim': {'sandy', 'alex', 'glenn'},
|
||||||
|
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
|
||||||
|
['alex', 'glenn', 'sandy']
|
||||||
|
|
||||||
|
>>> friend_besties('ali', {'kim': {'sandy', 'alex', 'glenn'},
|
||||||
|
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
|
||||||
|
[]
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
Part 3 - Social Network Second Besties
|
||||||
|
|
||||||
|
Write a function friend_second_besties() that calculates the "second-besties"
|
||||||
|
(i.e. degree-two friends) of a given individual in a social network. The
|
||||||
|
function takes two arguments:
|
||||||
|
|
||||||
|
- individual, an individual in the social network, in the form of a string ID
|
||||||
|
- bestie_dict, a dictionary of sets of friends of each individual in the
|
||||||
|
social network (as per the first question of the Project)
|
||||||
|
|
||||||
|
The function should return a sorted list, made up of all "degree-two" friends
|
||||||
|
for the individual. In the instance that the individual does not have any
|
||||||
|
degree-two friends in the social network, the function should return an
|
||||||
|
empty list.
|
||||||
|
|
||||||
|
Example function calls are:
|
||||||
|
|
||||||
|
>>> friend_second_besties('glenn', {'kim': {'sandy', 'alex', 'glenn'},
|
||||||
|
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
|
||||||
|
['alex', 'sandy']
|
||||||
|
|
||||||
|
>>> friend_second_besties('kim', {'kim': {'sandy', 'alex', 'glenn'},
|
||||||
|
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}})
|
||||||
|
[]
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
Part 4 - Network Coverage
|
||||||
|
|
||||||
|
Write a function besties_coverage() that computes the "coverage" of nodes
|
||||||
|
within a social network that are connected via predefined relationships to a
|
||||||
|
given list of individuals, i.e. the proportion of connected individuals, to the
|
||||||
|
total size of the network (= the number of people in the social network). The
|
||||||
|
function takes three arguments:
|
||||||
|
|
||||||
|
- individuals, a list of individuals, each in the form of a string ID
|
||||||
|
- bestie_dict, a dictionary of sets of friends of each individual in the
|
||||||
|
social network (as per the first question of the Project)
|
||||||
|
- relationship_list, a list of functions defining relationships in the
|
||||||
|
social network, selected from friend_besties and friend_second_besties.
|
||||||
|
|
||||||
|
The function should return a float, corresponding to the proportion of the
|
||||||
|
total number of individuals who are either a member of individuals or connected
|
||||||
|
via one of the relationships in relationship_list.
|
||||||
|
|
||||||
|
Example calls to the function are:
|
||||||
|
|
||||||
|
>>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
|
||||||
|
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
|
||||||
|
[])
|
||||||
|
0.25
|
||||||
|
|
||||||
|
>>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
|
||||||
|
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
|
||||||
|
[friend_besties])
|
||||||
|
0.5
|
||||||
|
|
||||||
|
>>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
|
||||||
|
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
|
||||||
|
[friend_second_besties])
|
||||||
|
0.75
|
||||||
|
|
||||||
|
>>> besties_coverage(['glenn'], {'kim': {'sandy', 'alex', 'glenn'},
|
||||||
|
'sandy': {'kim', 'alex'}, 'alex': {'kim', 'sandy'}, 'glenn': {'kim'}},
|
||||||
|
[friend_besties, friend_second_besties])
|
||||||
|
1.0
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
Part 5 - Social Network Attribute Prediction
|
||||||
|
|
||||||
|
The final question is for bonus marks, and is deliberately quite a bit harder
|
||||||
|
than the four basic questions (and the number of marks on offer is deliberately
|
||||||
|
not commensurate with the amount of effort required — bonus marks aren't meant
|
||||||
|
to be easy to get!). Only attempt this is you have completed the earlier
|
||||||
|
questions, and are up for a challenge!
|
||||||
|
|
||||||
|
The context for the bonus question is the prediction of attributes of a user
|
||||||
|
based on the attributes of their social network, and the observation that a
|
||||||
|
user's friends often have very similar interests and background to that user
|
||||||
|
(what is formally called homophily).
|
||||||
|
|
||||||
|
Write a function friendly_prediction() which takes four arguments:
|
||||||
|
|
||||||
|
- unknown_user, a string indicating the identity of the user you are to predict
|
||||||
|
attributes for
|
||||||
|
- features, a set of features you are to predict attributes for
|
||||||
|
- bestie_dict, a dictionary of sets of the besties for each user in the
|
||||||
|
dataset, following the same format as the earlier questions in the project
|
||||||
|
- feat_dict, a dictionary containing the known attributes for each user in the
|
||||||
|
training data, across a range of features; note that there is no guarantee
|
||||||
|
that the attribute for a given feature will be known for every training user
|
||||||
|
|
||||||
|
Your function should return a dictionary of features (based on features), with
|
||||||
|
a predicted list of values for each.
|
||||||
|
|
||||||
|
Your function should make its predictions as follows:
|
||||||
|
|
||||||
|
- first, identify the set of besties for the given user, and for each feature
|
||||||
|
of interest, determine the most-commonly attested attribute for that feature
|
||||||
|
among the besties; in the case of a tie, the prediction should be a sorted
|
||||||
|
list of attributes
|
||||||
|
|
||||||
|
- second, for any features where no bestie has an attribute for that feature
|
||||||
|
(meaning no prediction was possible in the first step), repeat the process
|
||||||
|
using the second-besties, once again in the form of a sorted list
|
||||||
|
of attributes
|
||||||
|
|
||||||
|
- in the case that no bestie or second-bestie has that attribute, return an
|
||||||
|
empty list.
|
||||||
|
|
||||||
|
Note that all attributes will take the form of strings, with the empty string
|
||||||
|
representing the fact that the user explicitly has no value for that feature
|
||||||
|
(e.g. if the user did not go to university, the value for university would be
|
||||||
|
''), and the lack of an attribute for a given feature indicating that the
|
||||||
|
attribute is unknown. Note further that even if the attribute for unknown_user
|
||||||
|
is available in feat_dict, you should predict based on the attributes of
|
||||||
|
besties and second besties.
|
||||||
|
|
||||||
|
Example calls to the function are:
|
||||||
|
|
||||||
|
>>> friendly_prediction('glenn', {'favourite author', 'university'},
|
||||||
|
{'kim': {'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'},
|
||||||
|
'alex': {'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn':
|
||||||
|
{'university': ''}, 'kim': {'favourite author': 'AA Milne'}, 'sandy':
|
||||||
|
{'favourite author': 'JRR Tolkien',
|
||||||
|
"university": "University of Melbourne"}, 'alex': {'favourite author':
|
||||||
|
'AA Milne', 'university': 'Monash University'}})
|
||||||
|
{'university': ['Monash University', 'University of Melbourne'],
|
||||||
|
'favourite author': ['AA Milne']}
|
||||||
|
|
||||||
|
>>> friendly_prediction('kim', {'university'}, {'kim':
|
||||||
|
{'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'}, 'alex':
|
||||||
|
{'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn': {'university': ''},
|
||||||
|
'kim': {'favourite author': 'AA Milne'}, 'sandy': {'favourite author':
|
||||||
|
'JRR Tolkien', "university": "University of Melbourne"}, 'alex':
|
||||||
|
{'favourite author': 'AA Milne', 'university': 'Monash University'}})
|
||||||
|
{'university': ['', 'Monash University', 'University of Melbourne']}
|
||||||
|
|
||||||
|
>>> friendly_prediction('kim', {'birthplace'}, {'kim':
|
||||||
|
{'sandy', 'alex', 'glenn'}, 'sandy': {'kim', 'alex'}, 'alex':
|
||||||
|
{'kim', 'sandy'}, 'glenn': {'kim'}}, {'glenn': {'university': ''},
|
||||||
|
'kim': {'favourite author': 'AA Milne'}, 'sandy': {'favourite author':
|
||||||
|
'JRR Tolkien', "university": "University of Melbourne"}, 'alex':
|
||||||
|
{'favourite author': 'AA Milne', 'university': 'Monash University'}})
|
||||||
|
{'birthplace': []}
|
BIN
project01-marks-updated.pdf
Executable file
BIN
project01-marks-updated.pdf
Executable file
Binary file not shown.
BIN
project01-marks.pdf
Executable file
BIN
project01-marks.pdf
Executable file
Binary file not shown.
BIN
project01-rubric.pdf
Executable file
BIN
project01-rubric.pdf
Executable file
Binary file not shown.
134
project01-sample-solutions.py
Normal file
134
project01-sample-solutions.py
Normal file
|
@ -0,0 +1,134 @@
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Part 1
|
||||||
|
|
||||||
|
def add_friend(individual, friend, friend_dict):
|
||||||
|
"""add `friend` as friend of `individual` in `friend_dict`"""
|
||||||
|
if individual not in friend_dict:
|
||||||
|
friend_dict[individual] = set()
|
||||||
|
friend_dict[individual].add(friend)
|
||||||
|
|
||||||
|
def get_friendly_dict(friend_list):
|
||||||
|
"""take `friend_list` (list of undirected friendship links)
|
||||||
|
and return set of all direct friends of each user in that
|
||||||
|
list, in form of dictionary of sets"""
|
||||||
|
|
||||||
|
# generate dictionary of all friends of users in `friend_list`
|
||||||
|
friend_dict = {}
|
||||||
|
for (node1, node2) in friend_list:
|
||||||
|
add_friend(node1, node2, friend_dict)
|
||||||
|
add_friend(node2, node1, friend_dict)
|
||||||
|
return friend_dict
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Part 2
|
||||||
|
|
||||||
|
def friend_besties(individual, bestie_dict):
|
||||||
|
"""generate the sorted set of (first-order) friends for `individual`, by
|
||||||
|
look-up in `bestie_dict`"""
|
||||||
|
|
||||||
|
if individual in bestie_dict:
|
||||||
|
besties = bestie_dict[individual]
|
||||||
|
else:
|
||||||
|
besties = set()
|
||||||
|
|
||||||
|
return sorted(besties)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Part 3
|
||||||
|
|
||||||
|
def friend_second_besties(individual, bestie_dict):
|
||||||
|
"""generate the set of (strictly) second-order friends for
|
||||||
|
`individual`, based on the contents of `bestie_dict`"""
|
||||||
|
|
||||||
|
# extract out friends of friends for each individual in
|
||||||
|
# `individual_list`, by finding friends of each individual,
|
||||||
|
# and friends of those friends, making sure that
|
||||||
|
# degree of separation is strictly 2
|
||||||
|
second_besties = set()
|
||||||
|
if individual in bestie_dict:
|
||||||
|
for bestie in bestie_dict[individual]:
|
||||||
|
if bestie in bestie_dict:
|
||||||
|
second_besties = second_besties.union(bestie_dict[bestie])
|
||||||
|
|
||||||
|
# remove anyone who is a direct friend or the individual themself
|
||||||
|
second_besties = second_besties.difference(
|
||||||
|
bestie_dict[individual].union(set([individual])))
|
||||||
|
return sorted(second_besties)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Part 4
|
||||||
|
|
||||||
|
def besties_coverage(individuals, bestie_dict, relationship_list):
|
||||||
|
"""calculate what proportion of individuals in `friend_dict` are
|
||||||
|
in the full list of individuals (`everyone_list`)"""
|
||||||
|
|
||||||
|
# get the set of everyone connected to each individual in
|
||||||
|
# `individuals` via the relations in `relationship_list`
|
||||||
|
# (including the individuals themselves)
|
||||||
|
connected = set()
|
||||||
|
for individual in individuals:
|
||||||
|
|
||||||
|
# only add to set of individuals if network
|
||||||
|
if individual in bestie_dict:
|
||||||
|
connected.add(individual)
|
||||||
|
|
||||||
|
for relationship in relationship_list:
|
||||||
|
connected = connected.union(relationship(individual, bestie_dict))
|
||||||
|
|
||||||
|
return len(connected) / len(bestie_dict)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Part 5
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
def predict_attribute(friends, feat_dict, feature):
|
||||||
|
"""predict the value of `feature` from the set `friends` based on the
|
||||||
|
attributes in `feat_dict`"""
|
||||||
|
|
||||||
|
# accumulator for attribute freqs
|
||||||
|
val_count = defaultdict(int)
|
||||||
|
|
||||||
|
# for each friend, add vote for relevant attribute if they have it
|
||||||
|
for friend in friends:
|
||||||
|
if friend in feat_dict and feature in feat_dict[friend]:
|
||||||
|
val_count[feat_dict[friend][feature]] += 1
|
||||||
|
|
||||||
|
# find the attributes with the highest frequency and return as
|
||||||
|
# sorted list, assuming at least one attribute prediction made
|
||||||
|
if val_count:
|
||||||
|
max_count = 0
|
||||||
|
for attribute, count in val_count.items():
|
||||||
|
if count > max_count:
|
||||||
|
att_list = [attribute]
|
||||||
|
max_count = count
|
||||||
|
elif count == max_count:
|
||||||
|
att_list.append(attribute)
|
||||||
|
return sorted(att_list)
|
||||||
|
|
||||||
|
# if no users with relevant attribute, no prediction to be made
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def friendly_prediction(unknown_user, features, bestie_dict, feat_dict):
|
||||||
|
"""predict the attributes of `unknown_user` for each feature in `features`,
|
||||||
|
based on the social network in `bestie_dict` and user attribute data in
|
||||||
|
`feat_dict`, and return the predictions in the form of a dictionary
|
||||||
|
of lists"""
|
||||||
|
|
||||||
|
# dictionary of predictions for each feature
|
||||||
|
predictions = {}
|
||||||
|
|
||||||
|
# predict attribute for each feature based on besties, and failing that,
|
||||||
|
# second besties
|
||||||
|
for feature in features:
|
||||||
|
besties_predict = predict_attribute(friend_besties(unknown_user,
|
||||||
|
bestie_dict), feat_dict, feature)
|
||||||
|
if besties_predict:
|
||||||
|
predictions[feature] = besties_predict
|
||||||
|
else:
|
||||||
|
second_besties_predict = predict_attribute(friend_second_besties(
|
||||||
|
unknown_user, bestie_dict), feat_dict, feature)
|
||||||
|
predictions[feature] = second_besties_predict
|
||||||
|
|
||||||
|
return predictions
|
Loading…
Reference in a new issue