# Heat maps in general

In [1]:
import pandas as pd
import bqplot
import numpy as np
import traitlets
import ipywidgets
%matplotlib inline

In [2]:
# use random data
data = np.random.random( (10,10) )
data

array([[0.23092037, 0.51070375, 0.14764475, 0.15717594, 0.81102341,
        0.54247886, 0.48342613, 0.57650189, 0.89794356, 0.62724028],
       [0.43179843, 0.52949779, 0.09160056, 0.22295566, 0.21432446,
        0.6280286 , 0.96302195, 0.91642855, 0.10334595, 0.28056707],
       [0.25498167, 0.45035291, 0.11763966, 0.56070143, 0.82339358,
        0.22771962, 0.00168879, 0.53503713, 0.10801506, 0.4750034 ],
       [0.56640445, 0.81418503, 0.53080142, 0.06487343, 0.66616992,
        0.15091289, 0.66515747, 0.62994161, 0.60454975, 0.08120314],
       [0.92989601, 0.21264162, 0.49969073, 0.48059324, 0.58078759,
        0.34760288, 0.10932379, 0.52237911, 0.39963938, 0.36748485],
       [0.09945786, 0.00839209, 0.03534934, 0.77675107, 0.48589062,
        0.26428261, 0.77959407, 0.54975223, 0.02571956, 0.9132695 ],
       [0.22419661, 0.92033569, 0.03124625, 0.7842493 , 0.8121055 ,
        0.40504285, 0.24324852, 0.04721563, 0.75476485, 0.74076596],
       [0.01701034, 0.28934682, 0.1524402

In [16]:
# create scales
col_sc = bqplot.ColorScale(scheme="Reds")
# create a color axis
c_ax = bqplot.ColorAxis(scale=col_sc, orientation='vertical', side='right')

# generate x&y scales
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()
# axes for these scales
x_ax = bqplot.Axis(scale=x_sc)
y_ax = bqplot.Axis(scale=y_sc, orientation='vertical')

heat_map = bqplot.GridHeatMap(color=data, scales={'color':col_sc, 
                                                 'row': x_sc,
                                                 'column': y_sc},
                             interactions={'click': 'select'},
                             anchor_style={'fill' :'blue'},
                             selected_style={'opacity':1.0},
                             unselected_style={'opacity':0.7})

# lets create a label widget
mySelectedLabel = ipywidgets.Label()

# lets link our label value to the data in the heat map
def get_data_value(change):
    #print(change)
    #i,j = heat_map.selected[0]
    i,j = change['owner'].selected[0]
    v = data[i,j]
    mySelectedLabel.value = 'Data value = ' + str(v)

# now we have to make sure we link this change function to our heatmap
heat_map.observe(get_data_value, 'selected')
    
fig = bqplot.Figure(marks=[heat_map], axes=[c_ax,x_ax,y_ax])
#fig
ipywidgets.VBox([mySelectedLabel, fig])

VkJveChjaGlsZHJlbj0oTGFiZWwodmFsdWU9dScnKSwgRmlndXJlKGF4ZXM9W0NvbG9yQXhpcyhvcmllbnRhdGlvbj0ndmVydGljYWwnLCBzY2FsZT1Db2xvclNjYWxlKHNjaGVtZT11J1JlZHPigKY=


In [13]:
heat_map.selected

[[6, 5]]

# Preliminary Dashboard

In [18]:
data = np.random.random((10,10,20))
data.shape
data[0,0,:]

array([0.39703929, 0.98590824, 0.88535143, 0.38633475, 0.3845142 ,
       0.99129723, 0.29209551, 0.17787804, 0.09238084, 0.7027257 ,
       0.8666513 , 0.62930709, 0.95876609, 0.24454906, 0.52001605,
       0.41127743, 0.67008351, 0.85898524, 0.38772933, 0.49966386])

In [26]:
# scales
col_sc = bqplot.ColorScale(scheme='Reds')
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# axis
c_ax = bqplot.ColorAxis(scale=col_sc, orientation='vertical', side='right')
x_ax = bqplot.Axis(scale=x_sc)
y_ax = bqplot.Axis(scale=y_sc, orientation='vertical')

# label
mySelectedLabel = ipywidgets.Label()
# label linking function
def get_data_value(change):
    i,j= change['owner'].selected[0]
    v = data[i,j].sum()
    mySelectedLabel.value = str(v)

heat_map = bqplot.GridHeatMap(color=np.sum(data,axis=2), scales={'color':col_sc, 
                                                 'row': x_sc,
                                                 'column': y_sc},
                             interactions={'click': 'select'},
                             anchor_style={'fill' :'blue'},
                             selected_style={'opacity':1.0},
                             unselected_style={'opacity':0.7})

# link our label linking function to changes in heatmap:
#heat_map.observe(get_data_value, 'selected')
fig = bqplot.Figure(marks=[heat_map], axes = [c_ax, y_ax, x_ax])

#ipywidgets.VBox([mySelectedLabel,fig])

# lets link a histogram plot
# scales
x_sch = bqplot.LinearScale()
y_sch = bqplot.LinearScale()
x_axh = bqplot.Axis(scale=x_sch, label='3rd Axis Values')
y_axh = bqplot.Axis(scale=y_sch, label='Frequency', orientation='vertical')

hist = bqplot.Hist(sample=data[0,0,:],
                  opacity=0.1, 
                  normalized=False, 
                  scales={'sample': x_sch, 'count':y_sch}, bins=5)

figh = bqplot.Figure(marks=[hist], axes=[x_axh, y_axh])

# last step here is to link our histogram's values to our selection
def get_data_value2(change):
    i,j = change['owner'].selected[0]
    v = data[i,j].sum()
    mySelectedLabel.value = 'Sum along 3rd axis = ' + str(v)
    hist.sample = data[i,j]
    
# link this change function to updates in our selection
heat_map.observe(get_data_value2, 'selected')

ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,figh])])


VkJveChjaGlsZHJlbj0oTGFiZWwodmFsdWU9dScnKSwgSEJveChjaGlsZHJlbj0oRmlndXJlKGF4ZXM9W0NvbG9yQXhpcyhvcmllbnRhdGlvbj0ndmVydGljYWwnLCBzY2FsZT1Db2xvclNjYWzigKY=


In [27]:
# lets start by loading the UFO dataset
ufos = pd.read_csv("/Users/jillnaiman1/Downloads/ufo-scrubbed-geocoded-time-standardized-00.csv",
                  names = ["date", "city", "state", "country",
                          "shape", "duration_seconds", "duration",
                          "comment", "report_date", 
                           "latitude", "longitude"],
                  parse_dates = ["date", "report_date"])

## Aside : downsampling

In [28]:
len(ufos)

80332

In [37]:
nsamples = 100

In [38]:
downSampleMask = np.random.randint(0,len(ufos)-1,nsamples)
downSampleMask

array([31780, 62315, 53122, 79590,  3115, 58193,  7437,  6151, 27323,
       43153, 46737, 47988, 75678, 35987, 40361, 53417, 31729, 60241,
       21036, 69539, 12960, 56204, 45528, 55003,  7802, 46603,  4870,
       55863, 66026, 43252, 21710, 13905,  5812, 24264, 72037, 22851,
       80236, 37702, 76441, 17048, 36256, 56598, 55116, 47594, 31417,
       41203, 68797,  4651, 47421, 32386, 72334, 10751, 27854, 75939,
       42513, 28183, 56509, 44599, 64341, 45042, 49922, 49586, 42493,
       56397, 34493, 24779, 50882, 75923, 74375, 58194,  6507, 26916,
       68453, 74599, 56099,  3691, 23755, 60865, 29612,  7380, 45024,
       35528, 73933, 72290, 63238, 21446, 47452, 76778, 77864, 50076,
       48345,  3374,  6990, 41243, 46064, 40473, 74927, 67684, 55111,
       50836])

In [39]:
# so to avoid repeats of our sample
downSampleMask = np.random.choice(range(len(ufos)-1), nsamples, replace=False)

In [40]:
ufosDS = ufos.loc[downSampleMask]
len(ufosDS)

100

In [41]:
ufosDS

Unnamed: 0,date,city,state,country,shape,duration_seconds,duration,comment,report_date,latitude,longitude
57070,2005-07-15 10:00:00,chippenham (uk/england),,gb,circle,7200.0,2hours 30,strange orange lights in the sky,2005-09-02,51.460000,-2.124722
61279,2005-07-03 21:00:00,clearwater,fl,us,unknown,3600.0,1 hour,Strange lights&#44 fast movements&#44 clear sky,2005-07-05,27.965556,-82.800278
29172,2007-02-03 23:00:00,exeter (uk/england),,gb,unknown,20.0,20sec,Approx. 6 red lights in what looked to be a /....,2007-02-24,50.700000,-3.533333
48858,1980-06-15 10:30:00,wapello,ia,us,circle,600.0,10min.,Craft sucking water from pond.,2005-04-16,41.181389,-91.185278
68397,2008-08-21 04:00:00,st. paul,mn,us,light,600.0,10 minutes,large white bright light moving through night ...,2009-12-12,44.944444,-93.093056
40404,2011-04-05 20:40:00,london (uk/england),,gb,light,900.0,15min,three very bright orange lights,2011-05-02,51.514125,-0.093689
6889,2003-10-07 21:35:00,san diego,ca,us,light,120.0,2 minutes,Orange light dripping flames.,2003-10-15,32.715278,-117.156389
5289,2005-10-29 19:00:00,surprise,az,us,light,60.0,1min,Immense Light Near Phoenix Northwest Valley,2005-11-08,33.630556,-112.332500
60831,2006-07-30 04:11:00,midlothian,tx,us,formation,300.0,5 minutes,Three star-like objects seen moving in shiftin...,2006-10-30,32.482222,-96.994167
65080,2005-08-01 20:00:00,bowmanville (canada),on,ca,teardrop,5.0,5 seconds,Bright green streak across the sky. ((NUFORC ...,2005-09-02,43.900000,-78.683333


In [45]:
# scales
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()
c_sc = bqplot.ColorScale()

# axes
x_ax = bqplot.Axis(scale=x_sc, label='Longitude')
y_ax = bqplot.Axis(scale=y_sc, label='Latitude', orientation='vertical')
c_ax = bqplot.ColorAxis(scale=c_sc, label='log(sec)', orientation='vertical', side='right')

# marks
scatters = bqplot.Scatter(x=ufosDS['longitude'], y = ufosDS['latitude'], 
                          color=np.log10(ufosDS['duration_seconds']),
                         scales = {'x':x_sc, 'y':y_sc, 'color':c_sc})

fig = bqplot.Figure(marks=[scatters], axes=[x_ax,y_ax,c_ax])
fig

RmlndXJlKGF4ZXM9W0F4aXMobGFiZWw9dSdMb25naXR1ZGUnLCBzY2FsZT1MaW5lYXJTY2FsZSgpKSwgQXhpcyhsYWJlbD11J0xhdGl0dWRlJywgb3JpZW50YXRpb249J3ZlcnRpY2FsJywgc2PigKY=


In [69]:
# lets do some rebinning/histograming with numpy for our heatmap
nlong = 20
nlat = 20

hist2d, long_edges, lat_edges = np.histogram2d(ufos['longitude'], ufos['latitude'], 
                                              weights=ufos['duration_seconds'], 
                                              bins=[nlong, nlat])
hist2d.shape, long_edges.shape, lat_edges.shape

# fancy in-line forloops
long_centers = [(long_edges[i]+long_edges[i+1])*0.5 for i in range(len(long_edges)-1)]
lat_centers = [(lat_edges[i]+lat_edges[i+1])*0.5 for i in range(len(lat_edges)-1)]
long_centers, lat_centers

# instead of all of the above, lets make our own histogram 2d bins
long_bins = np.linspace(-150,150, nlong+1)
lat_bins = np.linspace(-40, 70, nlat+1)
long_bins, lat_bins
hist2d, long_edges, lat_edges = np.histogram2d(ufos['longitude'], ufos['latitude'], 
                                              weights=ufos['duration_seconds'], 
                                              bins=[long_bins, lat_bins])
long_centers = [(long_edges[i]+long_edges[i+1])*0.5 for i in range(len(long_edges)-1)]
lat_centers = [(lat_edges[i]+lat_edges[i+1])*0.5 for i in range(len(lat_edges)-1)]

# lets do our data scaling to log here
np.log10(hist2d)
# trick our color scheme
hist2d[hist2d <= 0] = np.nan
hist2d = np.log10(hist2d)
hist2d = hist2d.T



In [77]:

# (I) Heatmap
# scales
col_sc = bqplot.ColorScale(scheme='RdPu')
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# axes
c_ax = bqplot.ColorAxis(scale=col_sc, orientation='vertical',side='right')
x_ax = bqplot.Axis(scale=x_sc, label='Longitude')
y_ax = bqplot.Axis(scale=y_sc, orientation='vertical', label='Latitude')

# marks: heatmap
heat_map = bqplot.GridHeatMap(color=hist2d, row=lat_centers, column = long_centers,
                             scales={'color':col_sc, 'row':y_sc, 'column':x_sc},
                             interactions={'click':'select'},
                             anchor_style={'fill':'blue'},
                             selected_style={'opacity':1.0},
                             unselected_stye={'opacity':1.0})

# (II) Bar plot of duration(year)
x_scl = bqplot.LinearScale()
y_scl = bqplot.LinearScale()
ax_xcl = bqplot.Axis(label='Date', scale=x_scl)
ax_ycl = bqplot.Axis(label='Total duration in sec', scale=y_scl, 
                    orientation = 'vertical', side='left')
i,j = 0,0
longs = [long_edges[j], long_edges[j+1]]
lats = [lat_edges[i],lat_edges[i+1]]
region_mask = ( (ufos['latitude']>=lats[0]) & (ufos['latitude']< lats[1]) &\
              (ufos['longitude']>=longs[0]) & (ufos['longitude']< longs[1]))
# lets get the ufo plot year => add to ufos dataset
ufos['year'] = ufos['date'].dt.year
dur, dur_edges = np.histogram(ufos['year'][region_mask], 
                             weights=ufos['duration_seconds'][region_mask],
                             bins=10)
# calculate center of our bins
dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
# make histogram by hand using bqplot.Bars
duration_hist = bqplot.Bars(x=dur_centers, y=dur,
                           scales={'x':x_scl,'y':y_scl})
fig_dur = bqplot.Figure(marks=[duration_hist], axes=[ax_xcl,ax_ycl])

# (V) Our interactions
# lets start building our dashboard
mySelectedLabel = ipywidgets.Label()
def get_data_value(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j]
    mySelectedLabel.value = 'Total Duration in log(sec) = ' + str(v)
    longs = [long_edges[j], long_edges[j+1]]
    lats = [lat_edges[i],lat_edges[i+1]]
    region_mask = ( (ufos['latitude']>=lats[0]) & (ufos['latitude']< lats[1]) &\
              (ufos['longitude']>=longs[0]) & (ufos['longitude']< longs[1]))
    dur, dur_edges = np.histogram(ufos['year'][region_mask], 
                             weights=ufos['duration_seconds'][region_mask],
                             bins=10)
    dur_centers = [(dur_edges[k]+dur_edges[k+1])*0.5 for k in range(len(dur_edges)-1)]
    duration_hist.x = dur_centers
    duration_hist.y = dur

    

# link this label to changes in our histogram
heat_map.observe(get_data_value, 'selected')

fig = bqplot.Figure(marks=[heat_map],axes=[c_ax,y_ax,x_ax])
#fig
ipywidgets.VBox([mySelectedLabel,ipywidgets.HBox([fig,fig_dur])])

VkJveChjaGlsZHJlbj0oTGFiZWwodmFsdWU9dScnKSwgSEJveChjaGlsZHJlbj0oRmlndXJlKGF4ZXM9W0NvbG9yQXhpcyhvcmllbnRhdGlvbj0ndmVydGljYWwnLCBzY2FsZT1Db2xvclNjYWzigKY=


In [82]:
# (I) Heatmap
# scales
col_sc = bqplot.ColorScale(scheme='RdPu')
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# axes
c_ax = bqplot.ColorAxis(scale=col_sc, orientation='vertical',side='right')
x_ax = bqplot.Axis(scale=x_sc, label='Longitude')
y_ax = bqplot.Axis(scale=y_sc, orientation='vertical', label='Latitude')

# marks: heatmap
heat_map = bqplot.GridHeatMap(color=hist2d, row=lat_centers, column = long_centers,
                             scales={'color':col_sc, 'row':y_sc, 'column':x_sc},
                             interactions={'click':'select'},
                             anchor_style={'fill':'blue'},
                             selected_style={'opacity':1.0},
                             unselected_stye={'opacity':1.0})

# (II) Bar plot of duration(year)
x_scl = bqplot.LinearScale()
y_scl = bqplot.LinearScale()
ax_xcl = bqplot.Axis(label='Date', scale=x_scl)
ax_ycl = bqplot.Axis(label='Total duration in sec', scale=y_scl, 
                    orientation = 'vertical', side='left')
i,j = 0,0
longs = [long_edges[j], long_edges[j+1]]
lats = [lat_edges[i],lat_edges[i+1]]
region_mask = ( (ufos['latitude']>=lats[0]) & (ufos['latitude']< lats[1]) &\
              (ufos['longitude']>=longs[0]) & (ufos['longitude']< longs[1]))
# lets get the ufo plot year => add to ufos dataset
ufos['year'] = ufos['date'].dt.year
dur, dur_edges = np.histogram(ufos['year'][region_mask], 
                             weights=ufos['duration_seconds'][region_mask],
                             bins=10)
# calculate center of our bins
dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
# make histogram by hand using bqplot.Bars
duration_hist = bqplot.Bars(x=dur_centers, y=dur,
                           scales={'x':x_scl,'y':y_scl})
fig_dur = bqplot.Figure(marks=[duration_hist], axes=[ax_xcl,ax_ycl])

# (III) Bargraph for UFO shapes
x_ord = bqplot.OrdinalScale()
y_ord = bqplot.LinearScale()
ax_xord = bqplot.Axis(label='Shape', scale=x_ord)
ax_yord = bqplot.Axis(label='Frequency', scale=y_ord,
                     orientation = 'vertical', 
                     side='left')
# pandas for histogram of ordinal variables
hist_ord = bqplot.Bars(x=ufos['shape'][region_mask].unique(), 
                      y=ufos['shape'][region_mask].value_counts(), 
                      scales={'x':x_ord, 'y':y_ord})
fig_shape = bqplot.Figure(marks=[hist_ord], axes=[ax_xord,ax_yord])

# (IV) Our interactions
# lets start building our dashboard
mySelectedLabel = ipywidgets.Label()
def get_data_value(change):
    i,j = change['owner'].selected[0]
    # label
    v = hist2d[i,j]
    mySelectedLabel.value = 'Total Duration in log(sec) = ' + str(v)
    # duration histogram
    longs = [long_edges[j], long_edges[j+1]]
    lats = [lat_edges[i],lat_edges[i+1]]
    region_mask = ( (ufos['latitude']>=lats[0]) & (ufos['latitude']< lats[1]) &\
              (ufos['longitude']>=longs[0]) & (ufos['longitude']< longs[1]))
    dur, dur_edges = np.histogram(ufos['year'][region_mask], 
                             weights=ufos['duration_seconds'][region_mask],
                             bins=10)
    dur_centers = [(dur_edges[k]+dur_edges[k+1])*0.5 for k in range(len(dur_edges)-1)]
    duration_hist.x = dur_centers
    duration_hist.y = dur
    # shape histogram
    hist_ord.x = ufos['shape'][region_mask].unique()
    hist_ord.y = ufos['shape'][region_mask].value_counts()

    

# link this label to changes in our histogram
heat_map.observe(get_data_value, 'selected')

# lets size our figures nicely
fig_dur.layout.max_width = '300px'
fig_dur.layout.max_height = '200px'
fig_shape.layout.max_width = '300px'
fig_shape.layout.max_height = '200px'
fig.layout.min_width = '600px'

fig = bqplot.Figure(marks=[heat_map],axes=[c_ax,y_ax,x_ax])
#fig
#ipywidgets.VBox([mySelectedLabel,ipywidgets.HBox([fig,fig_dur])])
ipywidgets.VBox([mySelectedLabel,
                ipywidgets.HBox([fig_shape, fig_dur]),
                fig])

VkJveChjaGlsZHJlbj0oTGFiZWwodmFsdWU9dScnKSwgSEJveChjaGlsZHJlbj0oRmlndXJlKGF4ZXM9W0F4aXMobGFiZWw9dSdTaGFwZScsIHNjYWxlPU9yZGluYWxTY2FsZSgpKSwgQXhpcyjigKY=


In [78]:
ufos['shape'].unique()

array(['cylinder', 'light', 'circle', 'sphere', 'disk', 'fireball',
       'unknown', 'oval', 'other', 'cigar', 'rectangle', 'chevron',
       'triangle', 'formation', nan, 'delta', 'changing', 'egg',
       'diamond', 'flash', 'teardrop', 'cone', 'cross', 'pyramid',
       'round', 'crescent', 'flare', 'hexagon', 'dome', 'changed'],
      dtype=object)