In [1]:
import bqplot
import ipywidgets
import pandas as pd

# probably don't need:
#%matplotlib inline

### Note: more examples available here - https://github.com/jupyter-widgets/ipyleaflet/tree/master/examples

# Networked data - Simple example


In [2]:
# lets start with some very basic node data
# **copy paste into chat **
node_data = [
    {"label": "Luke Skywalker", "media": "Star Wars", "shape": "rect"},
    {"label": "Jean-Luc Picard", "media": "Star Trek", "shape": "rect"},
    {"label": "Doctor Who", "media": "Doctor Who", "shape": "rect"},
    {"label": "Pikachu", "media": "Detective Pikachu", "shape": "circle"},
]

# we'll use bqplot.Graph to plot these
graph = bqplot.Graph(node_data=node_data,
                    colors = ["red", "red", "red", "red"])

fig = bqplot.Figure(marks = [graph])
fig

# you note I can pick them up and move them around, but they aren't connected in any way
# lets make some connections

Figure(fig_margin={'top': 60, 'bottom': 60, 'left': 60, 'right': 60}, marks=[Graph(colors=['red', 'red', 'red'…

In [3]:
node_data = [
    {"label": "Luke Skywalker", "media": "Star Wars", "shape": "rect"},
    {"label": "Jean-Luc Picard", "media": "Star Trek", "shape": "rect"},
    {"label": "Doctor Who", "media": "Doctor Who", "shape": "rect"},
    {"label": "Pikachu", "media": "Detective Pikachu", "shape": "circle"},
]

# lets link the 0th entry (luke skywalker) to both
#  jean-luc picard (1th entry) and pikachu (3rd entry)
link_data = [{'source': 0, 'target': 1}, {'source': 0, 'target':  3}]

graph = bqplot.Graph(node_data=node_data, link_data=link_data, 
                    colors = ["red", "red", "red", "red"])

#(2) we can also play with the springiness of our links:
graph.charge = -300 # setting it to positive makes them want to overlap and is, ingeneral, a lot of fun
# -300 is default

# (3) we can also change the link type:
graph.link_type = 'line' # arc = default, line, slant_line

# (4) highlight link direction, or not
graph.directed = False

fig = bqplot.Figure(marks = [graph])
fig

Figure(fig_margin={'top': 60, 'bottom': 60, 'left': 60, 'right': 60}, marks=[Graph(charge=-300, colors=['red',…

In [4]:
# we can do all the same things we've done with
# our previous map plots:
# for example, we can add a tooltip:
#(1)
tooltip = bqplot.Tooltip(fields=["media"])
graph = bqplot.Graph(node_data=node_data, link_data=link_data, 
                    colors = ["red", "red", "red", "red"],
                    tooltip=tooltip)

# we can also do interactive things with labels
label = ipywidgets.Label()

# note here that the calling sequence 
# is a little different - instead 
# of "change" we have "obj" and 
# "element"
def printstuff(obj, element):
    # (1.1)
    #print(obj)
    #print(element)
    label.value = 'Media = ' + element['data']['media']
    
graph.on_element_click(printstuff)


fig = bqplot.Figure(marks = [graph])
ipywidgets.VBox([label,fig])

VBox(children=(Label(value=''), Figure(fig_margin={'top': 60, 'bottom': 60, 'left': 60, 'right': 60}, marks=[G…

# Activity #2: Network data - subset of facebook friends dataset
* from: https://snap.stanford.edu/data/egonets-Facebook.html
* dataset of friends lists

#### Info about this dataset:
* the original file you can read in has about 80,000 different connections
* it is ordered by the most connected person (person 0) at the top
* because this network would be computationally slow and just a hairball - we're going to be working with downsampled data
* for example, a file tagged "000090_000010" starts with the 10th most connected person, and only included connections up to the 90th most connected person
* Its worth noting that this dataset (linked here and on the webpage) also includes feature data like gender, last name, school, etc - however it is too sparse to be of visualization use to us

Check out the other social network links at the SNAP data webpage!

In [6]:
# from 10 to 150 connections, a few large nodes
#filename = 'facebook_combined_sm000150_000010.txt'

# this might be too large: one large node, up to 100 connections
#filename='facebook_combined_sm000100.txt'

# start here
filename = 'facebook_combined_sm000090_000010.txt'

# then this one
#filename = 'facebook_combined_sm000030_000000.txt'
# note how different the topologies are

network = pd.read_csv('/Users/jnaiman/Downloads/'+filename,
                sep=' ', names=['ind1', 'ind2'])

# extra cleaning
network = network.drop_duplicates()

network

Unnamed: 0,ind1,ind2
0,10,67
2,13,21
3,13,26
4,13,56
5,13,59
...,...,...
113,73,88
114,75,85
115,80,88
116,82,84


In [7]:
# build the network
node_data = []
link_data = []
color_data = [] # all same color

# add nodes:
import numpy as np

# how many unique nodes do we have?
u_nodes = np.unique( np.append(network['ind1'], network['ind2']) )

for un in u_nodes:
    node_data.append({'label':str(un), "shape":"circle"})
    
for iu,un in enumerate(u_nodes):
    target_ids = network.loc[network['ind1'] == un]
    tnodes = np.unique(target_ids['ind2'].values)
    for t in tnodes: # find index associated with this un
        tind = np.where(t == u_nodes)[0][0]
        link_data.append({'source':iu, 'target':tind})
    color_data.append('blue')
                      
#link_data,node_data
#color_data

In [8]:
# plot

graph = bqplot.Graph(node_data=node_data, 
                     link_data = link_data,
                    colors=color_data)

# play with these for different graphs
graph.charge = -100 
graph.link_type = 'line'
graph.link_distance=50
# there is no direction to links
graph.directed = False

fig = bqplot.Figure(marks = [graph])
fig.layout.min_width='1000px'
fig.layout.min_height='900px'
# note: I think this has to be the layout for this to look right
fig

# in theory, we could color this network by what school folks are in, or some such
#  but while the dataset does contain some of these features, the 
#  answer rate is too sparse for our subset here

Figure(fig_margin={'top': 60, 'bottom': 60, 'left': 60, 'right': 60}, layout=Layout(min_height='900px', min_wi…

# Note: the below is just prep if you want to make your own subset datasets

In [None]:
# prep fb data by downsampling
minCon = 0
maxCon = 30
G = pd.read_csv('/Users/jnaiman/Downloads/facebook_combined.txt',sep=' ', names=['ind1', 'ind2'])
Gnew = np.zeros([2],dtype='int')
# loop and append
Gnew = G.loc[G['ind1']==minCon].values[0]
for i in xrange(G.loc[G['ind1']==minCon].index[0],len(G)):
    gl = G.loc[i].values
    if (gl[0] <= maxCon) and (gl[1] <= maxCon) and (gl[0] >= minCon) and (gl[1] >= minCon):
        Gnew = np.vstack((Gnew,gl))

np.savetxt('/Users/jnaiman/spring2019online/week09/data/facebook_combined_sm' + \
           str(maxCon).zfill(6) + '_' + str(minCon).zfill(6) + '.txt', Gnew,fmt='%i')

In [None]:
graph.link_distance