-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathzip_code_rate_mapper.py
163 lines (140 loc) · 9.13 KB
/
zip_code_rate_mapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#######################################
# Mapping NYC COVID-19 Zip Code Data
#######################################
#
#
import numpy as np
import matplotlib.pyplot as plt
import requests,datetime,os
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
from osgeo import ogr
plt.style.use('ggplot') # ggplot formatting
def basemapper(): #geographic plotting routine
fig,ax = plt.subplots(figsize=(14,10))
m = Basemap(llcrnrlon=bbox[0],llcrnrlat=bbox[1],urcrnrlon=bbox[2],
urcrnrlat=bbox[3],resolution='h', projection='cyl') # cylindrical projection ('merc' also works here)
shpe = m.readshapefile(shapefile.replace('.shp',''),'curr_shapefile')
m.drawmapboundary(fill_color='#bdd5d5') # map color
m.fillcontinents(color=plt.cm.tab20c(19)) # continent color
m.drawcounties(color='k',zorder=999)
parallels = np.linspace(bbox[1],bbox[3],5) # latitudes
m.drawparallels(parallels,labels=[True,False,False,False],fontsize=12,linewidth=0.25)
meridians = np.linspace(bbox[0],bbox[2],5) # longitudes
m.drawmeridians(meridians,labels=[False,False,False,True],fontsize=12,linewidth=0.25)
return fig,ax,m
# COVID-19 Datasets
github_url = 'https://raw.githubusercontent.com/nychealth/coronavirus-data/master/' # nyc data repository
data_file_urls = ['boro.csv','by-age.csv','by-sex.csv','case-hosp-death.csv',
'summary.csv','tests-by-zcta.csv'] # the .csv files to read where data exists
# shapefiles and geographic information
# nyc zip code shapefile: https://data.cityofnewyork.us/Business/Zip-Code-Boundaries/i8iw-xf4u
shapefile_folder = './ZIP_CODE_040114/' # location of city shapefile (locally)
shapefile = shapefile_folder+os.listdir(shapefile_folder)[0].split('_correct_CRS')[0]+'_correct_CRS.shp'
drv = ogr.GetDriverByName('ESRI Shapefile') # define shapefile driver
ds_in = drv.Open(shapefile,0) # open shapefile
lyr_in = ds_in.GetLayer() # grab layer
shp = lyr_in.GetExtent() # shapefile boundary
zoom = 0.01 # zooming out or in of the shapefile bounds
bbox = [shp[0]-zoom,shp[2]-zoom,shp[1]+zoom,shp[3]+zoom] # bounding box for plotting
fig,ax,m = basemapper() #handler for plotting geographic data
fig.canvas.draw() # draw to get transform data
transf = ax.transData.inverted() # for labeling
shape_areas = [ii['AREA'] for ii in m.curr_shapefile_info] # for shrinking text size in zip codes
# read borough data file first and plot
r = requests.get(github_url+data_file_urls[5]) # request the zipcode data
txt = r.content.decode('utf-8-sig').split('\r\n') # this vector contains all the data
header = txt[0].split(',') # get file header info
zipcodes = [data_row.split(',')[0] for data_row in txt[3:]] # zip codes
zipcode_pos = [float(data_row.split(',')[1]) for data_row in txt[3:]] # positive cases
zipcode_tot = [float(data_row.split(',')[2]) for data_row in txt[3:]] # total tested
zipcode_perc = [float(data_row.split(',')[3]) for data_row in txt[3:]] # percent (division of the two above * 100)
data_array = [zipcode_pos,zipcode_tot,zipcode_perc]
data_titles = ['Positive Cases','Total Tested','Percent Testing Positive']
data_indx = 2 # select which data to plot
data_range = [np.min(data_array[data_indx]),np.max(data_array[data_indx])] # for colormapping
# color schemes
cmap_sel = plt.cm.BuPu # selected colormap for visualizations
txt_color = 'w' # color of zipcode text
shape_edge_colors = 'k' # color of edges of shapes
NA_color = 'w' # color of zip codes without data
match_array,patches,patches_NA,color_array,txt_array = [],[],[],[],[]
for info,shape in zip(m.curr_shapefile_info,m.curr_shapefile):
if info['POPULATION']==0.0: # if no population, color the shape white
patches_NA.append(Polygon(np.array(shape), True, color=NA_color,label='')) # coloring the boroughs
continue
if info['ZIPCODE'] in zipcodes:
# first find where the data matches with shapefile
zip_indx = [ii for ii in range(0,len(zipcodes)) if info['ZIPCODE']==zipcodes[ii]][0]
zip_data = data_array[data_indx][zip_indx]
color_val = np.interp(zip_data,data_range,[0.0,1.0]) # data value mapped to unique color in colormap
c_map = cmap_sel(color_val) # colormap to use
if info['ZIPCODE'] in match_array:
# check if we've already labelled this zip code (some repeat for islands, parks, etc.)
patches.append(Polygon(np.array(shape), True, color=c_map))
else:
# this is the big data + annotation loop
match_array.append(info['ZIPCODE'])
patches.append(Polygon(np.array(shape), True, color=c_map)) # coloring the boroughs to colormap
x_pts = [ii[0] for ii in shape] # get the x pts from the shapefile
y_pts = [ii[1] for ii in shape] # get the y pts from the shapefile
x_pts_centroid = [np.min(x_pts),np.min(x_pts),np.max(x_pts),np.max(x_pts)] # x-centroid for label
y_pts_centroid = [np.min(y_pts),np.max(y_pts),np.min(y_pts),np.max(y_pts)] # y-centroid for label
# calculate eigenvectors from covariance to get rough rotation of each zip code
evals,evecs = np.linalg.eigh(np.cov(x_pts,y_pts))
rot_vec = np.matmul(evecs.T,[x_pts,y_pts]) # multiply by inverse of cov matrix
angle = (np.arctan(evecs[0][1]/evecs[0][0])*180.0)/np.pi # calculate angle using arctan
angle-=90.0
# make sure the rotation of the text isn't upside-down
if angle<-90.0:
angle+=180.0
elif angle>90.0:
angle-=180.0
if angle<-45.0:
angle+=90.0
elif angle>45.0:
angle-=90.0
# this is the zipcode label, shrunken based on the area of the zip code shape
txtbox = ax.text(np.mean(x_pts_centroid),np.mean(y_pts_centroid), info['ZIPCODE'], ha='center',
va = 'center',fontsize=np.interp(info['AREA'],np.sort(shape_areas)[::int(len(np.sort(shape_areas))/4.0)],
[1.0,2.0,4.0,5.0]),
rotation=angle, rotation_mode='anchor',color=txt_color,bbox=dict(boxstyle="round",ec=c_map,fc=c_map))
# this bit ensures no labels overlap or obscure other labels
trans_bounds = (txtbox.get_window_extent(renderer = fig.canvas.renderer)).transformed(transf)
for tbox in txt_array:
tbounds = (tbox.get_window_extent(renderer = fig.canvas.renderer)).transformed(transf)
loops = 0
while trans_bounds.contains(tbounds.x0,tbounds.y0) or trans_bounds.contains(tbounds.x1,tbounds.y1) or\
tbounds.contains(trans_bounds.x0,trans_bounds.y0) or tbounds.contains(trans_bounds.x1,trans_bounds.y1) or\
trans_bounds.contains(tbounds.x0+((tbounds.x0+tbounds.x1)/2.0),tbounds.y0+((tbounds.y0+tbounds.y1)/2.0)) or \
trans_bounds.contains(tbounds.x0-((tbounds.x0+tbounds.x1)/2.0),tbounds.y0-((tbounds.y0+tbounds.y1)/2.0)) or \
trans_bounds.contains(tbounds.x0+((tbounds.x0+tbounds.x1)/2.0),tbounds.y0-((tbounds.y0+tbounds.y1)/2.0)) or\
trans_bounds.contains(tbounds.x0-((tbounds.x0+tbounds.x1)/2.0),tbounds.y0+((tbounds.y0+tbounds.y1)/2.0)):
txtbox.set_size(txtbox.get_size()-1.0)
tbox.set_size(tbox.get_size()-1.0)
trans_bounds = (txtbox.get_window_extent(renderer = fig.canvas.renderer)).transformed(transf)
tbounds = (tbox.get_window_extent(renderer = fig.canvas.renderer)).transformed(transf)
loops+=1
if loops>10:
break
txt_array.append(txtbox)
if len(txt_array) % 10 == 0:
print('{0:2.0f} % Finished'.format(100.0*(len(txt_array)/len(zipcodes))))
else:
patches_NA.append(Polygon(np.array(shape), True, color=NA_color,label='')) # coloring the shapes that don't have data white (parks, etc.)
# adding the shapes and labels to the figure
pc = PatchCollection(patches, match_original=True, edgecolor=shape_edge_colors, linewidths=1., zorder=2,cmap=cmap_sel)
pc_NA = PatchCollection(patches_NA, match_original=True, edgecolor=shape_edge_colors, linewidths=1., zorder=2)
ax.add_collection(pc)
ax.add_collection(pc_NA)
pc.set_clim(data_range) # set the colorbar bounds
cb = plt.colorbar(pc,shrink=0.75) # shrink the colorbar a little
cb.set_label(data_titles[data_indx],fontsize=18,labelpad=10) # set the label
leg1 = ax.legend(handles=[patches_NA[-1]],title='No Data',
fontsize=16,framealpha=0.95,loc='upper left') # add legend to describe NA-values
# textbox showing the date the data was processed
txtbox = ax.text(0.0, 0.025, 'Computed on '+datetime.datetime.now().strftime('%b %d, %Y %H:%M'), transform=ax.transAxes, fontsize=14,
verticalalignment='center', bbox=dict(boxstyle='round', facecolor='w',alpha=0.5))
txtbox.set_x(1.1-(txtbox.figure.bbox.bounds[2]-(txtbox.clipbox.bounds[2]-txtbox.clipbox.bounds[0]))/txtbox.figure.bbox.bounds[2])
plt.show()