
## We can also use python to make diagrams
* This uses matplotlib.patches

In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams["font.family"] = "sans-serif" # note, could also use like "sans-serif" others, just google

# lets also import numpy
import numpy as np

# ----- ACTIVITY #1: Diagrams ------

In [None]:
# lets re-do that diagram of the angular distribution of human vision we had in the slides from last lecture
# ***** GO TO LECTURE SLIDES FROM LAST TIME ****
# lets focus on the large FOV image

# make edge color for our patches black
edgecolor = "#000000" # black
# note, the above uses the standard hex codes for colors
#  we'll talk a bit about this later, but you can also just google "hex color codes", like so:
# https://www.color-hex.com/

# lets color our patches like what is in the slide
facecolor_totalFOV = "#1f77b4" # blueish
facecolor_bincFOV = "#ff7f0e" # orangish

# NOTE: do plt.show after each step!!

# (1)
totalFOV = matplotlib.patches.Wedge([0.0, 0.0], 1.0, 90 - (210/2.0), 90 + (210/2.0), # span of the wedge
                                    lw=2.0, 
                                    facecolor=facecolor_totalFOV, 
                                    edgecolor=edgecolor)

# (3) lets put this other wedge definition up here with the totalFOV one for consistency
binoc = matplotlib.patches.Wedge([0.0, 0.0], 1.0, 90 - (114/2.0), 90 + (114/2.0), 
                                 width=0.25, # so that it doesn't overlap totally with total FOV
                                 lw=2.0, 
                                 facecolor=facecolor_bincFOV, edgecolor=edgecolor)

# (5) Finally, if we remember back to the figure, there was an arrow dictating 
#     the forward direction
facecolor_arrow = "#aaaaaa" # gray
arrow = matplotlib.patches.Arrow(-1.10, 0.0, 0.0, 0.75, 
                                 width=0.25, edgecolor=edgecolor, 
                                 facecolor=facecolor_arrow)#, label="forward")


# (1)
fig, ax = plt.subplots(figsize=(10, 7), dpi=300)
ax.add_patch(totalFOV)
ax.set_xlim(-1.25, 1.25)
ax.set_ylim(-0.5, 1.25)

# (3)
ax.add_patch(binoc)

# (5)
ax.add_patch(arrow)

# (6) Finally, lets overplot the arrow's notatoin
plt.text(-1.22, 0.35, "Forward", rotation=90, fontsize="xx-large")


# (4) lets also add a legend to remind us what is what
ax.legend([totalFOV, binoc], ["Total FOV", "Binocular FOV"], fontsize="x-large")


# (2) lets disappear the axis & ticks
ax.set_xticks([])
ax.set_yticks([])
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)

plt.show()

## Take away
* so, that was a lot of effort (maybe) to make a diagram, *but* we can now go back and change things very easily 
* for example we can change all the colors **do this**, or we can change the size of the wedge
* the take away is that Python not only makes graphs, but it can also be used to make diagrams

# ------ ACTIVITY #2: Manipulating Images -------

## Python can also be used to manipulate images
* lets try with our stitch image

In [None]:
# note: for this to work you might have to install pillow
#  either with pip: pip3 install pillow (might have to use sudo)
#  or anaconda: conda install -c anaconda pillow 
# Also, I think there is a way to do it using the GUI, but I've never used the Anaconda GUI to install things before
import PIL.Image as Image
#data = np.array(Image.open("https://uiuc-ischool-dataviz.github.io/spring2019online/week01/images/stitch_nobg_tilted.png", "r"))
im = Image.open("/Users/jnaiman/Downloads/stitch_reworked.png", "r")

data = np.array(im)

In [None]:
np.unique(data[:,:,0])
# so we see there are only 3 colors

In [None]:
# lets remind ourselves a bit of what this looks like
fig,ax = plt.subplots(figsize=(5,5))
ax.imshow(im)

In [None]:
# if we recall last time, we talked a bit about how to use this visualization to
#  figure out how to quantify the goodness or badness of stitch, lets play with 
# this idea a bit more


# lets now use pixel filling to determine the volumetric good and bad levels
ngood = (data[:,:,0] == 255).sum() # number of "good" pixels
nbad = (data[:,:,0] == 126).sum() # number of "bad" pixels
total = ngood + nbad
badness = nbad / total
goodness = ngood/  total
print(badness, goodness)

# so, looks like ~77% bad and 23% good
# does that match up with what you'd think from looking at the above figure?

In [None]:
# now, lets plot this thing on a little bar graph!
p1 = plt.bar([1], badness, [0.5], color='#991620')
p2 = plt.bar([1], goodness, [0.5], bottom=badness)
plt.xlim(0.0, 2.0)

In [None]:
# what if we just counted pixes from our figure above?
# looks like good changes to badness at ~150
# image top is at ~50 pixels
# image bottom is at ~450 pixels

# so:
goodness_apparent = (50-150)/(50-450)

print(goodness_apparent)



In [None]:
# what is badness, apparent
1.0-goodness_apparent

In [None]:
plt.pie([badness,goodness]) # can also do a pie chart if we want I suppose :D
# note: this uses wedges!!

# ------ ACTIVITY #3: Python tips and tricks! ------
(note: this will be put up as a "cheat sheet" for students if we are running short on time)

* We've been playing around with a few complex things in Python, but lets take a step back for a moment and delve into how Python deals with data in a bit more detail

In [None]:
# initialize a
a = []

In [None]:
# take a gander at a
a
# hey look a is an empty list!

In [None]:
# we can mix types in our lists
a = [1, 2, "hey"]
# here we have a few integers and a string

In [None]:
# lets look at a again
a

In [None]:
# also, for our general purposes, we can call a string with a single or double quotes
'hey' == "hey"

In [None]:
# we can also easily add to our list with the append statement
a.append("there")
a

In [None]:
# returns an item at an index, & removes item, default is the last item
a.pop()

In [None]:
# now a is back to what we had before
a

In [None]:
# we can also grab elements of a by their indicies
a[1]

In [None]:
# note that indexing starts from 0 in python
a[0]

In [None]:
# the -# can be used to grab starting from the last element of the list
a[-1]

In [None]:
# the colon means "all the things"
a[:]

In [None]:
# we can also take subsets easily, for example, ignorning the first element of a
# this is a way to filter data
a[1:]

In [None]:
# can also take all but the last eleement
a[:-1]

In [None]:
# we can also combine these two things to grab from the first to the 2nd to last element
# in this case, the one element
a[1:-1]

In [None]:
# there are also some nice string manipulations we can do
#  like splitting a string into a list object
a = "this is a much longer list, where i have taken a sentence and split it based on the spaces".split()

In [None]:
a

In [None]:
# we can grab every other element in the list
a[::2]

In [None]:
# we can also reorder this list back-to-front
a[::-1]

In [None]:
# we can also update individual strings in this list
a[3] = 'sorta'

In [None]:
a

In [None]:
# now lets look quickly at some funny things about strings in Python
name = "jill"

In [None]:
name[0]

In [None]:
# this will produce an error
name[0] = 'J'

In [None]:
# have to use something like replace
name.replace("j","J")

In [None]:
# python also has stuff called dictionaries
d = {'bevier': 'building', 'green' : 'road', 'champaign': 'city'}

In [None]:
d

In [None]:
# here the "champaign" entry is of type "city"
d['champaign']

In [None]:
# its super easy to add to dictionaries, here we add an empty list
d['mylist'] = []

In [None]:
d

In [None]:
# we can add to this list in the usual way - with the above "append" function we used before
d['mylist'].append(10)

In [None]:
d

In [None]:
# there are these other cool objects called "sets"
myset = set()

In [None]:
myset

In [None]:
# lets check out some operations with sets, for example some movies I like
jill_movies = set(['last jedi', 'girls trip', 'frozen'])
# lets say we have another person named bob an these are the movies he likes
bob_movies = set(['last jedi', 'other movie'])

In [None]:
jill_movies

In [None]:
bob_movies

In [None]:
# we can create a set that is made up of my movies, but without those movies that appear in bob's movies list
jill_movies - bob_movies

In [None]:
jill_movies[0] # note we can't index

In [None]:
# we can take the union of sets
jill_movies.union(bob_movies)

In [None]:
# for some final string manipulation, we can use a thing called enumerate 
# to both count in a for loop and use an element of our list directly
for i, word in enumerate(reversed(a)):
    print(i, word.upper())

In [None]:
# continue and break are flow control statements
for i, word in enumerate(sorted(a)):
    if word == "and":
        continue
    if word == "it":
        break
    print(i, word.upper())

In [None]:
# also, we can use the "?" to figure out things we don't know, for example the reader
#  function from  the csv library
import csv

In [None]:
csv.reader?

# ------ ACTIVITY #4: CSV Read and manipulate -----

* Ok, now lets check out some building inventory documents for Illinois
* Note: normally these would be on the jupyter hub, but again, we have to download them from the website!

In [None]:
# again,note we have to supply the full path
f = open("/Users/jnaiman/Downloads/building_inventory.csv")

In [None]:
# so here we are just going to read in our data.
# we can see its sort of in a weird format
f.seek(0)
for record in csv.reader(f):
    print(record)

In [None]:
# lets fill up a dictonary with each column
# this is just saying, loop over each column
f.seek(0)
reader = csv.reader(f)
header = next(reader)
header


In [None]:
data = {} # empty dictonary
for col in header:
    data[col] = []
data # now we have an empty dictonary with named entries ready to be filled

In [None]:
# to fill the dictionary we are going to use the function "zip"
# here is a little example
a = ["hi", "there", "my", "friends"]
b = [9, 4, 1, 9]
for word, num in zip(a, b):
    print(word, num)
#  you can think of if kind of like "enumerate" that we 
# used above, but its iterating over 2 lists here instead of a number and a list


In [None]:
# ok, so we are going to use this function zip to fill our dictionary

# first, a call like before
f.seek(0)
reader = csv.reader(f)
header = next(reader)
data = {}
# fill column names as dictionary headings
for col in header:
    data[col] = []
    
# now, fill lists within headers
for row in reader:
    for col, val in zip(header, row):
        data[col].append(val)

In [None]:
len(data['Zip code'])

In [None]:
len(data['Agency Name'])

In [None]:
# we can also use keys() to list our dictionary names
data.keys()

In [None]:
# we can do fun things with this dataset, like look at square footage of particular buildings
for i, location in enumerate(data['Address']):
    if '905 S Goodwin Ave' in location:
        print(data['Square Footage'][i])

In [None]:
#####len(set(data['Agency Name']))

In [None]:
# we will import collections which is a library that provides other data structures
# in which to store our data
# its sort of like "numpy" for arrays in this sense
import collections

In [None]:
# for example, here we can create a counter for how many entries have particular agency names
collections.Counter(data['Agency Name'])
#data['Agency Name']

In [None]:
# lets pick out data particularly for UIUC
# first create an empty dictonary
uiuc = {}
# give it all the right headers
for col in header:
    uiuc[col] = []

uiuc

In [None]:
# fill this subset from data
for i, agency in enumerate(data['Agency Name']):
    if agency == 'University of Illinois Urbana-Champaign':
        for key in data:
            uiuc[key].append(data[key][i])
uiuc

In [None]:
collections.Counter(uiuc['Agency Name'])

In [None]:
max(uiuc['Square Footage'])

In [None]:
# note that the abofe is a string
type(uiuc['Square Footage'][0])

In [None]:
# this can present an issue if we want to compare otherwise numbers
'9' > '150000'

# --------- ACTIVITY #5: Using pandas to read and look at data -------

In [None]:
# we can also do a lot of these things iwth pandas library
# again, this is something you can pip or anaconda install if you need to
import pandas as pd

In [None]:
buildings = pd.read_csv('/Users/jnaiman/Downloads/building_inventory.csv')

In [None]:
buildings
# formatting here is sort of nice

In [None]:
# how many entries are there? as an iterable
buildings.index

In [None]:
buildings.loc[0:3] #easy to grab subsets - here by label
#buildings.loc? #easy to grab subsets - here by label
#buildings.columns

In [None]:
buildings.iloc[0:3] # look up by index, here the same as above, but note it treats data a bit differently

In [None]:
buildings.loc[0:10]["Agency Name"] # grab 1-10 entries, and print out the Agency names of those entries

In [None]:
buildings["Agency Name"].nunique() # how many unique agencies

In [None]:
# if you are used to R at all, this is sort of like "summary" function, but basically giving some 
# summary statistics for the numerical data in our dataset
buildings.describe()

In [None]:
buildings.loc[buildings["Square Footage"] == 0] # we can subset easily - here, for buildings without any sq footage!

In [None]:
buildings["Bldg Status"].unique() 

In [None]:
buildings.loc[buildings["Bldg Status"] == "In Progress"] # who is being built now?

In [None]:
buildings["Square Footage"].plot() # easy plots with pandas dataframes

In [None]:
# again, another way to sort
for status, df in buildings.groupby("Bldg Status"):
    print(status, df.shape[0])

In [None]:
buildings.max()

In [None]:
pd.read_csv?

In [None]:
b = pd.read_csv("/Users/jnaiman/Downloads/building_inventory.csv",
           na_values = {'Square Footage': 0,
                       'Year Acquired': 0,
                       'Year Constructed': 0}) 
# specify what to do with incomplete entries, here this just says if any of these columns have a value 0, treat
#  as a NaN or not-a-number

In [None]:
b["Square Footage"].min()

In [None]:
b["Year Constructed"].min()

In [None]:
b["Year Acquired"].min()

In [None]:
b.loc[b["Year Acquired"] < 1800]

In [None]:
b2 = b.sort_values("Year Constructed")

In [None]:
b2.iloc[0]

In [None]:
b.sort_values("Year Constructed", inplace=True)

In [None]:
b.groupby("Year Acquired")["Square Footage"].sum()

In [None]:
b.loc[b["Agency Name"] == "University of Illinois"].groupby("Year Acquired")["Square Footage"].sum()