135 lines
4.9 KiB
Python
135 lines
4.9 KiB
Python
|
# ------------------------------------------------------------------------------
|
||
|
# Part 1
|
||
|
|
||
|
def add_friend(individual, friend, friend_dict):
|
||
|
"""add `friend` as friend of `individual` in `friend_dict`"""
|
||
|
if individual not in friend_dict:
|
||
|
friend_dict[individual] = set()
|
||
|
friend_dict[individual].add(friend)
|
||
|
|
||
|
def get_friendly_dict(friend_list):
|
||
|
"""take `friend_list` (list of undirected friendship links)
|
||
|
and return set of all direct friends of each user in that
|
||
|
list, in form of dictionary of sets"""
|
||
|
|
||
|
# generate dictionary of all friends of users in `friend_list`
|
||
|
friend_dict = {}
|
||
|
for (node1, node2) in friend_list:
|
||
|
add_friend(node1, node2, friend_dict)
|
||
|
add_friend(node2, node1, friend_dict)
|
||
|
return friend_dict
|
||
|
|
||
|
# ------------------------------------------------------------------------------
|
||
|
# Part 2
|
||
|
|
||
|
def friend_besties(individual, bestie_dict):
|
||
|
"""generate the sorted set of (first-order) friends for `individual`, by
|
||
|
look-up in `bestie_dict`"""
|
||
|
|
||
|
if individual in bestie_dict:
|
||
|
besties = bestie_dict[individual]
|
||
|
else:
|
||
|
besties = set()
|
||
|
|
||
|
return sorted(besties)
|
||
|
|
||
|
# ------------------------------------------------------------------------------
|
||
|
# Part 3
|
||
|
|
||
|
def friend_second_besties(individual, bestie_dict):
|
||
|
"""generate the set of (strictly) second-order friends for
|
||
|
`individual`, based on the contents of `bestie_dict`"""
|
||
|
|
||
|
# extract out friends of friends for each individual in
|
||
|
# `individual_list`, by finding friends of each individual,
|
||
|
# and friends of those friends, making sure that
|
||
|
# degree of separation is strictly 2
|
||
|
second_besties = set()
|
||
|
if individual in bestie_dict:
|
||
|
for bestie in bestie_dict[individual]:
|
||
|
if bestie in bestie_dict:
|
||
|
second_besties = second_besties.union(bestie_dict[bestie])
|
||
|
|
||
|
# remove anyone who is a direct friend or the individual themself
|
||
|
second_besties = second_besties.difference(
|
||
|
bestie_dict[individual].union(set([individual])))
|
||
|
return sorted(second_besties)
|
||
|
|
||
|
# ------------------------------------------------------------------------------
|
||
|
# Part 4
|
||
|
|
||
|
def besties_coverage(individuals, bestie_dict, relationship_list):
|
||
|
"""calculate what proportion of individuals in `friend_dict` are
|
||
|
in the full list of individuals (`everyone_list`)"""
|
||
|
|
||
|
# get the set of everyone connected to each individual in
|
||
|
# `individuals` via the relations in `relationship_list`
|
||
|
# (including the individuals themselves)
|
||
|
connected = set()
|
||
|
for individual in individuals:
|
||
|
|
||
|
# only add to set of individuals if network
|
||
|
if individual in bestie_dict:
|
||
|
connected.add(individual)
|
||
|
|
||
|
for relationship in relationship_list:
|
||
|
connected = connected.union(relationship(individual, bestie_dict))
|
||
|
|
||
|
return len(connected) / len(bestie_dict)
|
||
|
|
||
|
# ------------------------------------------------------------------------------
|
||
|
# Part 5
|
||
|
|
||
|
from collections import defaultdict
|
||
|
|
||
|
def predict_attribute(friends, feat_dict, feature):
|
||
|
"""predict the value of `feature` from the set `friends` based on the
|
||
|
attributes in `feat_dict`"""
|
||
|
|
||
|
# accumulator for attribute freqs
|
||
|
val_count = defaultdict(int)
|
||
|
|
||
|
# for each friend, add vote for relevant attribute if they have it
|
||
|
for friend in friends:
|
||
|
if friend in feat_dict and feature in feat_dict[friend]:
|
||
|
val_count[feat_dict[friend][feature]] += 1
|
||
|
|
||
|
# find the attributes with the highest frequency and return as
|
||
|
# sorted list, assuming at least one attribute prediction made
|
||
|
if val_count:
|
||
|
max_count = 0
|
||
|
for attribute, count in val_count.items():
|
||
|
if count > max_count:
|
||
|
att_list = [attribute]
|
||
|
max_count = count
|
||
|
elif count == max_count:
|
||
|
att_list.append(attribute)
|
||
|
return sorted(att_list)
|
||
|
|
||
|
# if no users with relevant attribute, no prediction to be made
|
||
|
else:
|
||
|
return []
|
||
|
|
||
|
def friendly_prediction(unknown_user, features, bestie_dict, feat_dict):
|
||
|
"""predict the attributes of `unknown_user` for each feature in `features`,
|
||
|
based on the social network in `bestie_dict` and user attribute data in
|
||
|
`feat_dict`, and return the predictions in the form of a dictionary
|
||
|
of lists"""
|
||
|
|
||
|
# dictionary of predictions for each feature
|
||
|
predictions = {}
|
||
|
|
||
|
# predict attribute for each feature based on besties, and failing that,
|
||
|
# second besties
|
||
|
for feature in features:
|
||
|
besties_predict = predict_attribute(friend_besties(unknown_user,
|
||
|
bestie_dict), feat_dict, feature)
|
||
|
if besties_predict:
|
||
|
predictions[feature] = besties_predict
|
||
|
else:
|
||
|
second_besties_predict = predict_attribute(friend_second_besties(
|
||
|
unknown_user, bestie_dict), feat_dict, feature)
|
||
|
predictions[feature] = second_besties_predict
|
||
|
|
||
|
return predictions
|