Skip to content

Commit 81b1bbb

Browse files
committed
Updating to python3
1 parent ecb69f4 commit 81b1bbb

File tree

1 file changed

+56
-32
lines changed

1 file changed

+56
-32
lines changed
+56-32
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,100 @@
11
#!/usr/bin/python
22

33
import sys
4-
import urllib2
4+
import urllib.request
55
import json
66
import os
77
import base64, uuid
88
import re
9+
import logging
910

1011
CACHEDIR="cache"
1112
if not os.path.exists(CACHEDIR):
1213
os.makedirs(CACHEDIR)
1314

1415
def cacheFilename(url):
15-
z=base64.b64encode(url).rstrip('=\n').replace('/', '_')
16+
z=base64.b64encode(url.encode("utf-8")).decode("utf-8").rstrip('=\n').replace('/', '_')
1617
return z + '.json'
1718

1819
def fetch(url, retry=0):
1920
cached = os.path.join(CACHEDIR, cacheFilename(url))
2021
if os.path.exists(cached):
21-
fh = file(cached)
22-
data = fh.read()
23-
fh.close()
22+
logging.debug('Getting %s from cache: %s',url, cached)
23+
with open(cached, encoding = 'utf-8') as fh:
24+
data = json.loads(fh.read())
2425
else:
25-
request = urllib2.Request(url)
26+
request = urllib.request.Request(url)
2627
if '@' in url:
2728
result= re.search(r"\/\/(.*)@", url)
2829
url = re.sub(r"\/\/*.*@", r'//', url)
29-
request = urllib2.Request(url)
30+
request = urllib.request.Request(url)
3031
if result:
3132
base64string = base64.b64encode(result.group(1))
3233
request.add_header("Authorization", "Basic %s" % base64string)
3334

3435
try:
35-
fh = urllib2.urlopen(request)
36-
data = fh.read()
36+
fh = urllib.request.urlopen(request)
37+
data = json.loads(fh.read())
3738
fh.close()
38-
fh = file(cached, 'w')
39-
fh.write(data)
40-
fh.close()
41-
except urllib2.HTTPError as error:
42-
print("Getting " + url + " failed due to " + str(error.code) + " " + error.reason + " retry " + str(retry))
39+
40+
with open(cached, 'w', encoding='utf-8') as f:
41+
json.dump(data, f, ensure_ascii=False, indent=4)
42+
43+
except urllib.request.HTTPError as error:
44+
data = None
45+
# 404 means no annotations for this canvas
46+
logging.error("Getting %s failed due to %s: %s (Rery: %s)", url, error.code, error.reason, retry)
4347
if error.code == 500 and retry < 5:
4448
return fetch(url, retry+1)
49+
except urllib.error.URLError as error:
50+
data = None
51+
# 404 means no annotations for this canvas
52+
logging.error("Failed to get %s due to %s. Do you have the correct URL for SAS and is it running?", url, error)
4553

4654
return data
4755
if __name__ == "__main__":
48-
56+
logging.basicConfig( encoding='utf-8', level=logging.ERROR)
4957
if len(sys.argv) < 4:
5058
print("Usage:\n\tdownloadAnnotationListsByCanvas.py [manifest] [sas_endpoint] [output_dir] [optional outputfilename proc]")
5159
print ("Arg no = %s" % len(sys.argv))
5260
sys.exit(0)
5361

54-
print ("Downloading manifest")
55-
manifest = json.loads(fetch(sys.argv[1]))
62+
print ("Downloading manifest: {}".format(sys.argv[1]))
63+
manifest = fetch(sys.argv[1])
64+
if not manifest:
65+
print ('Failed to load manifest')
66+
exit(-1)
67+
sasEndpoint = sys.argv[2]
68+
if sasEndpoint.endswith('/'):
69+
# remove last slash
70+
sasEndpoint = sasEndpoint[:-1]
5671

5772
count=0
5873
for canvas in manifest["sequences"][0]["canvases"]:
5974
count += 1
60-
print ("Downloading %s " % canvas["@id"])
61-
annoListData = fetch("%s/annotation/search?uri=%s" % (sys.argv[2], canvas["@id"]))
62-
# add list to resource
63-
annoList = {
64-
"@type" : "sc:AnnotationList",
65-
"context": "http://iiif.io/api/presentation/2/context.json",
66-
"resources": json.loads(annoListData)
67-
}
68-
if len(sys.argv) > 4 and sys.argv[4] == 'nlw':
69-
filename = canvas["@id"].split('/')[-1]
70-
else:
71-
filename = "page%s.json" % count
72-
with open("%s/%s" % (sys.argv[3],filename), 'wb') as outfile:
73-
json.dump(annoList, outfile, sort_keys=False,indent=4, separators=(',', ': '))
74-
outfile.close()
75+
annoListData = fetch("%s/annotation/search?uri=%s" % (sasEndpoint, canvas["@id"]))
76+
if annoListData:
77+
print ("Downloaded annotations for canvas: {} ".format(canvas["@id"]))
78+
# add list to resource
79+
annoList = {
80+
"@type" : "sc:AnnotationList",
81+
"context": "http://iiif.io/api/presentation/2/context.json",
82+
"resources": annoListData
83+
}
84+
if len(sys.argv) > 4 and sys.argv[4] == 'nlw':
85+
filename = canvas["@id"].split('/')[-1]
86+
else:
87+
filename = "page%s.json" % count
88+
89+
outputDirectory = sys.argv[3]
90+
outFilename = "%s/%s" % (outputDirectory,filename)
91+
if not os.path.exists(outputDirectory):
92+
os.makedirs(outputDirectory)
93+
94+
with open(outFilename, 'w') as outfile:
95+
json.dump(annoList, outfile, indent=4)
96+
print ('Saved file: {}'.format(outFilename))
97+
#else:
98+
# print ('No annotations for canvas: {}'.format(canvas['@id']))
7599
#except:
76100
# print (annoListData)

0 commit comments

Comments
 (0)