# Activity #1: Heat maps
* we'll start with building up a heat map based on some small, randomly generate data
* we'll use this methodology to make our plot interactive & then move on to using "real" data

In [1]:
# lets import our usual stuff
import pandas as pd
import bqplot
import numpy as np
import traitlets
import ipywidgets
%matplotlib inline

In [2]:
# lets start thinking about heatmaps with some random data
data = np.random.random((10, 10))
data
# so we just have a 10 x 10 array here

array([[0.93289185, 0.46156884, 0.87425892, 0.66938746, 0.27068634,
        0.06696743, 0.5865793 , 0.22894174, 0.16024406, 0.65099904],
       [0.74417381, 0.45556694, 0.60693891, 0.64119046, 0.54655343,
        0.29196572, 0.61330689, 0.72266469, 0.13855186, 0.86269556],
       [0.18028588, 0.15720023, 0.52441539, 0.35196979, 0.04505631,
        0.87362272, 0.70615044, 0.09532859, 0.27358622, 0.70170921],
       [0.20436217, 0.81067885, 0.38796407, 0.06419703, 0.6786254 ,
        0.9743041 , 0.45256919, 0.35970903, 0.62058446, 0.04616012],
       [0.8757509 , 0.20339671, 0.88189425, 0.62582643, 0.31997731,
        0.68381107, 0.65419485, 0.37235433, 0.5389935 , 0.08171061],
       [0.00173915, 0.51845021, 0.64614285, 0.19992176, 0.71833796,
        0.77696622, 0.78062745, 0.70608081, 0.54507027, 0.19998451],
       [0.27217371, 0.74291131, 0.0229821 , 0.53295556, 0.91509542,
        0.60055314, 0.24356407, 0.30216177, 0.25100141, 0.68214449],
       [0.19502   , 0.09796333, 0.5914884

In [3]:
# lets start by generating a quick heat map

# (1)
# create our first scale of our plot: just a color scale
col_sc = bqplot.ColorScale() 
# now we'll use bqplot's gridheatmap function
#  with our randomly generated data & our scales to 
#  make a heatmap like so:
heat_map = bqplot.GridHeatMap(color = data, 
                              scales = {'color': col_sc})
# put our marks into our figure and lets go!
fig = bqplot.Figure(marks = [heat_map])

# (2) ok, this is fine and all, but lets add some reference for our 
#  color scheme with a colorbar & also lets choose a different 
#  color scheme
col_sc = bqplot.ColorScale(scheme = "Reds")
# lets plot some axes on our plot as well, in this case
#  our axis will be a color bar, vertically on the right
#  of our heatmap
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
# put it all together and lets take a look!
heat_map = bqplot.GridHeatMap(color = data, 
                              scales = {'color': col_sc})
# generate fig!
fig = bqplot.Figure(marks = [heat_map], axes = [c_ax])

# (3) finally, lets add some axes labels on the x & y axis,
#  we need to add their scales first
# this scale will just count up the boxes in the vertical 
#   & horizontal direction
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()
# add our axes objects
x_ax = bqplot.Axis(scale = x_sc)
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical')
heat_map = bqplot.GridHeatMap(color = data, 
                              scales = {'color': col_sc, 
                                        'row': y_sc,
                                        'column':x_sc})
fig = bqplot.Figure(marks = [heat_map], 
                    axes = [c_ax, y_ax, x_ax])


fig

Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale(scheme='Reds'), side='right'), Axis(orientatio…

In [4]:
# so, while this indeed a lovely heatmap, it isn't interactive in any way!
#  boo to that!
# Lets start adding in some interactivity

# keep data from last time

# now add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme = "Reds")
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc)
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical')

# lets now re-do our heat map & add in some interactivity:
heat_map = bqplot.GridHeatMap(color = data,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, # to make our selection blue
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 0.8})

# stir and combine into 1 figure
fig = bqplot.Figure(marks = [heat_map], 
                    axes = [c_ax, y_ax, x_ax])

fig


Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale(scheme='Reds'), side='right'), Axis(orientatio…

In [5]:
#  Ok fine, but our selection isn't linked to anything!
#  lets check out what heat_map selected is
heat_map.selected
#  note if I select a different box & re-run this cell,
#  I get out different values


In [6]:
# so now, lets write a little function that links the data value
#  to the selected & lets print this in a little ipywidgets label
mySelectedLabel = ipywidgets.Label()

# (1) 
# lets write our linking function
# there are a few ways to link this,
#  here is a simple way first
def get_data_value(change):
    i,j = heat_map.selected[0]
    v = data[i,j] # grab data value
    mySelectedLabel.value = str(v) # set our label

# (2) this is maybe in-elegant as we are 
#  explicitly calling our origininal heat map!
#  so, lets instead remind ourselves what "change" is here
def get_data_value(change):
    print(change)
    i,j = heat_map.selected[0]
    v = data[i,j] # grab data value
    mySelectedLabel.value = str(v) # set our label
# now we see when we click we get back a whole
#  dictionary of information - if we recall, 
#  "owner" here is our heat_map which "owns" 
# this change.
#  If we want to be able to apply our function to 
#  this or any other heatmap figure we generate,
#  we can re-write the above function as follows:

# (3)
#def get_data_value(change,mylab):
def get_data_value(change):
    #print(change['owner'].selected)
    i,j = change['owner'].selected[0]
    v = data[i,j] # grab data value
    mySelectedLabel.value = str(v) # set our label
    #mylab.value = str(v) # set our label
# so, this now is applied to any map that we choose to input
    
# regenerate our heatmap to use in our fig canvas
heat_map = bqplot.GridHeatMap(color = data,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 0.8})
    
# make sure we check out     
heat_map.observe(get_data_value, 'selected')
#heat_map.observe(self, mySelectedLabel)
fig = bqplot.Figure(marks = [heat_map], 
                    axes = [c_ax, y_ax, x_ax])

ipywidgets.VBox([mySelectedLabel, fig])
#fig

VBox(children=(Label(value=''), Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale(scheme='Reds')…

# Activity #2: Preliminary dashboarding
* we'll use a random dataset to explore how to make dashboard-like plots that change when things are updated

In [7]:
# now lets move on to making a preliminary 
#dashboard for multi-dimensional datasets
#  lets first start with some randomly generated data again

In [8]:
data = np.random.random((10, 10,20))
data

data.shape

data[0,0,:]
# we can see that no instead of 1 value, each "i,j" component
#  has an array of values

# lets start building up linked plots
#  first, lets re-do our plot above with our label printing
#  out the sum along this 5-d array

# now add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme = "Reds")
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc)
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical')

# create label again
mySelectedLabel = ipywidgets.Label()

def get_data_value(change):
    i,j = change['owner'].selected[0]
    # if we run with this, our label is the 20 elements
    #v = data[i,j] # grab data value
    # but,lets sum instead
    v = data[i,j].sum() # grab data value
    mySelectedLabel.value = str(v) # set our label
# so, this now is applied to any map that we choose to input
    
    
    
# regenerate our heatmap to use in our fig canvas
# now, we want to plot the sum along our 3rd axis as well, 
#  so, lets do this with "np.sum" along our 3rd axis
heat_map = bqplot.GridHeatMap(color = np.sum(data,axis=2),
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 0.8})
    
# make sure we check out     
heat_map.observe(get_data_value, 'selected')
#heat_map.observe(self, mySelectedLabel)
fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

#(1)
#ipywidgets.VBox([mySelectedLabel, fig])

# (2)
# now, lets generate another figure that just plots the histogram of values in our 3rd axis
x_sch = bqplot.LinearScale()
y_sch = bqplot.LinearScale()
x_axh = bqplot.Axis(scale = x_sch, label = 'Sum of 3rd axis')
y_axh = bqplot.Axis(scale = y_sch, 
                    orientation = 'vertical', 
                    label='Frequency')

hist = bqplot.Hist(sample = data[0,0,:],
                    opacity = 0.1, 
                   normalized = False, # normalized=False means we get counts in each bin
                    scales = {'sample': x_sch, 'count': y_sch},
                  bins = 5)
figh = bqplot.Figure(marks = [hist], axes = [x_axh, y_axh])
# ok, so side by side plots, but nothing updates!

#(3) so, we have to update what our heatmap has access to as 
#  far as being able to update both the label *AND* the 
# histogram's data
def get_data_value2(change):
    i,j = change['owner'].selected[0]
    # if we run with this, our label is the 20 elements
    #v = data[i,j] # grab data value
    # but,lets sum instead
    v = data[i,j].sum() # grab data value
    mySelectedLabel.value = str(v) # set our label
    hist.sample = data[i,j]
    #print(data[i,j])
heat_map.observe(get_data_value2, 'selected')
# note here now the heat_map is in a sense "driving" our
# changes.
# *** DO EXAMPLE OF BACK AND FORTH ***


ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,figh])] )


VBox(children=(Label(value=''), HBox(children=(Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale…

# Activity #3: Dashboarding with "real" data
* now we'll move onto the UFO dataset and start messing around with creating a dashboard for this dataset

In [11]:
# lets start by loading the UFO dataset
ufos = pd.read_csv("/Users/jnaiman/Downloads/ufo-scrubbed-geocoded-time-standardized-00.csv",
                  names = ["date", "city", "state", "country",
                          "shape", "duration_seconds", "duration",
                          "comment", "report_date", 
                           "latitude", "longitude"],
                  parse_dates = ["date", "report_date"])

## Aside: downsampling
* some folks reported having a tough time with interactivity of scatter plots with the UFO dataset
* here we'll quickly go over some methods of downsampling that can be applied to decrease the size of our dataset

In [12]:
# you'll see the above takes a good long time to load on my computer
# the length of the dataset is quite large:
len(ufos)

80332

In [13]:
# 80,000!  So, to speed up our interactivity, we can 
#  randomly sample this dataset for plotting purposes
# lets down sample to 1000 samples:
nsamples = 1000
#nsamples = 5000
downSampleMask = np.random.randint(0,len(ufos)-1,nsamples)
downSampleMask
# so, downsample mask is now a list of random indicies for 
#  the UFO dataset

array([26630, 67544, 65407, 39870, 74090, 43185, 17727, 49990,  3695,
       42047, 14437, 18563, 79817, 25058, 24942, 10267, 15931, 55560,
       43046, 12038, 29690, 68222,  8013, 45464, 22992, 34964, 12094,
        3423, 41797, 45188, 71080, 16179, 76659, 72330, 32398, 30002,
       15269, 56214, 51901, 68819, 24765,  6654, 58830, 24703, 38276,
       34664, 44961, 47215, 45129, 57428, 24583, 39703, 48931, 16202,
       45931, 10964, 52392, 38136, 46470, 24627, 41496, 18393, 58068,
       45202, 29621, 31102, 16580, 13788, 37524, 30597, 72031,  4229,
       75315, 30737, 28577, 75341, 35019, 70910, 11497, 49741,  7748,
       78354, 62885, 47918, 46372, 11327,  6292, 63927, 49756, 80232,
       19481, 26086, 67547, 11365, 37939,  8584, 15921, 45063, 77280,
       52161, 48067, 46947, 28414, 39140, 39099, 68281,  4264, 70569,
        4777, 58959, 77518, 30075,  3166,  6077,  1211, 55729, 15703,
        8319, 15170, 80005, 46843, 11879, 38085, 21870, 67089, 15595,
       23175,  6652,

In [14]:
# the above doesn't disclude repeats, but we can take 
#  care of this with a different call:
downSampleMask = np.random.choice(range(len(ufos)-1), 
                                  nsamples, replace=False)

In [15]:
# lets update:
ufosDS = ufos.loc[downSampleMask]
len(ufosDS)
# so much shorter

1000

In [16]:
# we can also see that this is saved as a dataframe:
ufosDS

Unnamed: 0,date,city,state,country,shape,duration_seconds,duration,comment,report_date,latitude,longitude
77130,1998-09-23 21:11:00,everett,wa,us,fireball,2.0,2 seconds,Bright Green blazing ball passing from Northea...,1998-11-01,47.979167,-122.200833
8193,1998-11-01 23:59:00,west bend,wi,,light,600.0,10min.,the night before art bell left the airwaves wh...,2002-09-06,43.797762,-88.003208
22320,2008-01-24 18:10:00,hayward,ca,us,oval,180.0,3 minutes,Unknown Red Oval in the sky over the San Franc...,2008-02-14,37.668889,-122.079722
78264,2010-09-28 22:00:00,yakima,wa,us,chevron,5.0,5 seconds,Chevron shape of pale white/yellow lights flyi...,2010-11-21,46.602222,-120.504722
52480,2008-06-29 19:00:00,cape girardeau,mo,us,oval,900.0,15 minutes,5 silver&#44 oval shaped objects moving slowly...,2008-07-05,37.305833,-89.518056
...,...,...,...,...,...,...,...,...,...,...,...
61355,2010-07-03 02:33:00,columbia,sc,us,formation,5.0,5 secs,On July 3&#44 2010 at 2:33 am I was traveling ...,2010-08-30,34.000556,-81.035000
18000,2012-12-13 19:15:00,homewood,al,us,light,3.0,3 seconds,Unexplained single bright light passes overhea...,2012-12-20,33.471667,-86.800833
53707,2010-06-05 01:57:00,wilmore,ky,us,light,2.0,2 seconds,Bright white light flying over wilmore Kentucky,2010-06-10,37.861944,-84.661667
19541,1999-12-02 17:10:00,seattle (univ of wa),wa,us,light,30.0,30 sec,Two lights moving eratically over Lake Washing...,1999-12-16,47.606389,-122.330833


In [17]:
# lets make a super quick scatter plot to remind ourselves what this looks like:
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label='Latitude')

#(1)
#scatters = bqplot.Scatter(x = ufosDS['longitude'],
#                          y = ufosDS['latitude'],
#                          scales = {'x': x_sc, 'y': y_sc})

# (2) recall we can also color by things like duration
c_sc = bqplot.ColorScale()
#c_ax = bqplot.ColorAxis(scale = c_sc, label='Duration in sec', orientation = 'vertical', side = 'right')
#scatters = bqplot.Scatter(x = ufosDS['longitude'],
#                          y = ufosDS['latitude'],
#                          color=ufosDS['duration_seconds'],
#                          scales = {'x': x_sc, 'y': y_sc, 'color':c_sc})

# (3) again, we recall that there is a large range in durations, so 
#  it makes sense that we have a muted color pattern - we want 
# to use a log colorscale
# with bqplot we can do this with:
c_ax = bqplot.ColorAxis(scale = c_sc, label='log(sec)', 
                        orientation = 'vertical', side = 'right')
scatters = bqplot.Scatter(x = ufosDS['longitude'],
                          y = ufosDS['latitude'],
                          color=np.log10(ufosDS['duration_seconds']),
                          scales = {'x': x_sc, 'y': y_sc, 'color':c_sc})

fig = bqplot.Figure(marks = [scatters], axes = [x_ax, y_ax, c_ax]) 
fig

Figure(axes=[Axis(label='Longitude', scale=LinearScale()), Axis(label='Latitude', orientation='vertical', scal…

In [18]:
# now we are going to use our heatmap idea to plot this data again
#  note this will shmear out a lot of the nice map stuff we see above
#  don't worry!  We'll talk about making maps in the next class or so

# what should we color by?  lets do by duration

# to get this to work with our heatmap, we're going 
#  to have to do some rebinning
#  right now, our data is all in 1 long list
# we need to rebin things in a 2d histogram where 
# the x axis is long & y is lat

# ***START WITH 10 EACH**
nlong = 20 
nlat = 20

#(1)
hist2d, long_edges, lat_edges = np.histogram2d(ufos['longitude'], 
                                               ufos['latitude'], 
                                               weights=ufos['duration_seconds'], 
                                              bins=[nlong,nlat])
# this returns the TOTAL duration of ufo events in each bin
hist2d

# (2)
# to do the average duration in each bin we can do:
hist2d, long_edges, lat_edges = np.histogram2d(ufos['longitude'], 
                                               ufos['latitude'], 
                                               weights=ufos['duration_seconds'],
                                              normed=True, 
                                              bins = [nlong,nlat])
hist2d

# (3) ok, lets go back to total duration
hist2d, long_edges, lat_edges = np.histogram2d(ufos['longitude'], 
                                               ufos['latitude'], 
                                               weights=np.log10(ufos['duration_seconds']),
                                              bins = [nlong,nlat])

# note that the sizes of the edges & the hist are different:
hist2d.shape, long_edges.shape, lat_edges.shape

# this is becuase the edges are bin edges, not centers
# to get bin centers we can do:
# lets do some fancy in-line forloops
long_centers = [(long_edges[i]+long_edges[i+1])*0.5 for i in range(len(long_edges)-1)]
lat_centers = [(lat_edges[i]+lat_edges[i+1])*0.5 for i in range(len(lat_edges)-1)]
long_centers, lat_centers

# (4) note: we might want to control where our bins are, we can do this by 
#  specifying bin edges ourselves
long_bins = np.linspace(-150, 150, nlong+1)
lat_bins = np.linspace(-40, 70, nlat+1)
long_bins, long_bins.shape
lat_bins, lat_bins.shape


hist2d, long_edges, lat_edges = np.histogram2d(ufos['longitude'], 
                                               ufos['latitude'], 
                                               weights=ufos['duration_seconds'],
                                              bins = [long_bins,lat_bins])

# this is becuase the edges are bin edges, not centers
long_centers = [(long_edges[i]+long_edges[i+1])*0.5 for i in range(len(long_edges)-1)]
lat_centers = [(lat_edges[i]+lat_edges[i+1])*0.5 for i in range(len(lat_edges)-1)]

# (5)
# again, we want to take the log scale of things
#  we're going to do this by taking the log of hist2d
#  but there are some zero values in this hsitogram
# if we just take the log we get -inf
np.log10(hist2d)
# this can mess up our color scheme mapping

# (6) so we are going to "trick" our color scheme like so
hist2d[hist2d <= 0] = np.nan # set zeros to NaNs
# then take log
hist2d = np.log10(hist2d)
hist2d

# (7) finally, our histogram is actually
#  transposed - this is just how numpy outputs it,
# lets put the world right side up with:
hist2d = hist2d.T



In [19]:
# now that we have all that fancy binning out of the way, 
#  lets proceed as normal:

# add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(hist2d), 
                           max=np.nanmax(hist2d))
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')#,
                       #label='log(sec)')
x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label = 'Latitude')

heat_map = bqplot.GridHeatMap(color = hist2d,
                              row = lat_centers, 
                              column = long_centers,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 1.0})

#***GO BACK AND PLAY WITH BIN SIZES***

# (2) lets add a label again to pritn duration
# create label again
mySelectedLabel = ipywidgets.Label()
def get_data_value(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label
        
        
        
        
# make sure we check out     
heat_map.observe(get_data_value, 'selected')


fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

#(1)
#fig

#(2)
ipywidgets.VBox([mySelectedLabel,fig])

VBox(children=(Label(value=''), Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale(max=8.09710747…

In [20]:
# ok, now lets build up our dashboard 
# again to also show how the duration of UFO sitings in each 
# selected region changes with year

# we'll do this with the same methodology we applied before
#  **copy paste above***

# (1)

# (I) For the heatmap
# add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(hist2d), 
                           max=np.nanmax(hist2d))
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label = 'Latitude')

heat_map = bqplot.GridHeatMap(color = hist2d,
                              row = lat_centers, 
                              column = long_centers,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 1.0})

fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

# (II) Scatter plot
# scales & ax in usual way
import datetime as dt
x_scl = bqplot.DateScale(min=dt.datetime(1950,1,1),max=dt.datetime(2020,1,1)) # note: for dates on x-axis
y_scl = bqplot.LogScale()
ax_xcl = bqplot.Axis(label='Date', scale=x_scl)
ax_ycl = bqplot.Axis(label='Duration in Sec', scale=y_scl, 
                    orientation='vertical', side='left')
# for the lineplot of duration in a region as a function of year
# lets start with a default region & year
i,j = 0,0
longs = [long_edges[i], long_edges[i+1]]
lats = [lat_edges[j],lat_edges[j+1]]
region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )

# we can see this selects for the upper right point of our heatmap
lats, longs, ufos['latitude'][region_mask]

# lets plot the durations as a function of year there
duration_scatt = bqplot.Scatter(x = ufos['date'][region_mask],
                               y = ufos['duration_seconds'][region_mask], 
                              scales={'x':x_scl, 'y':y_scl})

fig_dur = bqplot.Figure(marks = [duration_scatt], axes = [ax_xcl, ax_ycl])

# create label again
mySelectedLabel = ipywidgets.Label()
def get_data_value(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label
        
# make sure we connect to heatmap     
#heat_map.observe(get_data_value, 'selected')

# (2) now again, we want our scatter plot to react to changes 
#  to what we've selected so:
def get_data_value2(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label
    # note!! i & j are swapped here to machup with hist & selection 
    longs = [long_edges[j], long_edges[j+1]]
    lats = [lat_edges[i],lat_edges[i+1]]
    region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )
    duration_scatt.x = ufos['date'][region_mask]
    duration_scatt.y = ufos['duration_seconds'][region_mask]
    #print(i,j)
    #print(longs,lats)
    #print(ufos['date'][region_mask])
# make sure we connect to heatmap     
heat_map.observe(get_data_value2, 'selected')


ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,fig_dur])])
# note that when I select a deep purple place, my scatter plot is 
#  very laggy, this makes me think we should do this with a 
#  histogram/bar type plot


VBox(children=(Label(value=''), HBox(children=(Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale…

In [21]:
# (I) For the heatmap
# add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(hist2d), 
                           max=np.nanmax(hist2d))
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label = 'Latitude')

heat_map = bqplot.GridHeatMap(color = hist2d,
                              row = lat_centers, 
                              column = long_centers,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 1.0})

fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

# (II) Bar plot
# scales & ax in usual way
x_scl = bqplot.LinearScale() # note we are back to linears
y_scl = bqplot.LinearScale()
ax_xcl = bqplot.Axis(label='Date', scale=x_scl)
ax_ycl = bqplot.Axis(label='Total duration in Sec', scale=y_scl, 
                    orientation='vertical', side='left')
# for the lineplot of duration in a region as a function of year
# lets start with a default region & year
i,j = 0,0
longs = [long_edges[i], long_edges[i+1]]
lats = [lat_edges[j],lat_edges[j+1]]
region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )

# we can see this selects for the upper right point of our heatmap
lats, longs, ufos['latitude'][region_mask]

# lets plot the durations as a function of year there
ufos['year'] = ufos['date'].dt.year
dur, dur_edges = np.histogram(ufos['year'][region_mask],
                              weights=ufos['duration_seconds'][region_mask],
                              bins=10)
# like before with our histograms
dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
# make histogram by hand, weighting by duration
duration_hist = bqplot.Bars(x=dur_centers, y=dur, 
                          scales={'x':x_scl, 'y':y_scl})
fig_dur = bqplot.Figure(marks = [duration_hist], axes = [ax_xcl, ax_ycl])


#  to what we've selected so:
def get_data_value(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label
    # note!! i & j are swapped here to machup with hist & selection 
    longs = [long_edges[j], long_edges[j+1]]
    lats = [lat_edges[i],lat_edges[i+1]]
    region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )
    dur, dur_edges = np.histogram(ufos['year'][region_mask],
                                  weights=ufos['duration_seconds'][region_mask],
                                  bins=10)
    dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
    duration_hist.x = dur_centers
    duration_hist.y = dur
# make sure we connect to heatmap     
heat_map.observe(get_data_value, 'selected')


ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,fig_dur])])

VBox(children=(Label(value=''), HBox(children=(Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale…

## Might not get to this...

In [22]:
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(hist2d), 
                           max=np.nanmax(hist2d))
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label = 'Latitude')

heat_map = bqplot.GridHeatMap(color = hist2d,
                              row = lat_centers, 
                              column = long_centers,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 1.0})

fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

# (II) Bar plot for durations thorugh the years
# scales & ax in usual way
x_scl = bqplot.LinearScale() # note we are back to linears
y_scl = bqplot.LinearScale()
ax_xcl = bqplot.Axis(label='Date', scale=x_scl)
ax_ycl = bqplot.Axis(label='Total duration in Sec', scale=y_scl, 
                    orientation='vertical', side='left')
# for the lineplot of duration in a region as a function of year
# lets start with a default region & year
i,j = 0,0
longs = [long_edges[i], long_edges[i+1]]
lats = [lat_edges[j],lat_edges[j+1]]
region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )

# we can see this selects for the upper right point of our heatmap
lats, longs, ufos['latitude'][region_mask]

# lets plot the durations as a function of year there
ufos['year'] = ufos['date'].dt.year
dur, dur_edges = np.histogram(ufos['year'][region_mask],
                              weights=ufos['duration_seconds'][region_mask],
                              bins=10)
# like before with our histograms
dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
# make histogram by hand, weighting by duration
duration_hist = bqplot.Bars(x=dur_centers, y=dur, 
                          scales={'x':x_scl, 'y':y_scl})
fig_dur = bqplot.Figure(marks = [duration_hist], axes = [ax_xcl, ax_ycl])

# (III) histogram for shape
x_ord = bqplot.OrdinalScale()
y_ord = bqplot.LinearScale()
ax_xord = bqplot.Axis(label='Shape', scale=x_ord)
ax_yord = bqplot.Axis(label='Freq', scale=y_ord,
                     orientation='vertical',
                     side='left')

# histogram using pandas
hist_ord = bqplot.Bars(x=ufos['shape'][region_mask].unique(),
                       y=ufos['shape'][region_mask].value_counts(),
                     scales={'x':x_ord, 'y':y_ord})
fig_shape = bqplot.Figure(marks=[hist_ord], axes=[ax_xord,ax_yord])

#  to what we've selected so:
def get_data_value(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label
    # note!! i & j are swapped here to machup with hist & selection 
    longs = [long_edges[j], long_edges[j+1]]
    lats = [lat_edges[i],lat_edges[i+1]]
    region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )
    dur, dur_edges = np.histogram(ufos['year'][region_mask],
                                  weights=ufos['duration_seconds'][region_mask],
                                  bins=10)
    dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
    duration_hist.x = dur_centers
    duration_hist.y = dur
    # also update shapes
    #print(ufos['shape'][region_mask])
    hist_ord.x = ufos['shape'][region_mask].unique()
    hist_ord.y = ufos['shape'][region_mask].value_counts()
# make sure we connect to heatmap     
heat_map.observe(get_data_value, 'selected')

# lets make all the sizes look nice
fig_dur.layout.max_width = '400px'
fig_dur.layout.max_height= '300px'
fig_shape.layout.max_width = '400px'
fig_shape.layout.max_height= '300px'
fig.layout.min_width = '800px' # add to both

# dhange layout
#ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,ipywidgets.VBox([fig_shape,fig_dur])])])
ipywidgets.VBox([mySelectedLabel,
                ipywidgets.HBox([fig_shape,fig_dur]),
                fig])

VBox(children=(Label(value=''), HBox(children=(Figure(axes=[Axis(label='Shape', scale=OrdinalScale()), Axis(la…