Commit 2248333b authored by Mark Jordan's avatar Mark Jordan
Browse files

All basic functionality now works.

parent 1523ae8b
......@@ -4,10 +4,17 @@
site_base_url = 'http://digital.lib.sfu.ca/'
import sys
import os
import urllib
import urllib2
import json
import pprint
reload(sys)
sys.setdefaultencoding('utf8')
# For debugging purposes.
import pprint
pp = pprint.PrettyPrinter(indent=4)
model = 'islandora:collectionCModel'
......@@ -16,8 +23,17 @@ collections_query = 'RELS_EXT_hasModel_uri_t:"' + model + '"?sort=fgs_label_s+as
site_rest_url = site_base_url + 'islandora/rest/v1/'
collections_request_url = site_rest_url + 'solr/' + collections_query
collections_response_body = urllib2.urlopen(collections_request_url).read()
def writeContentFile(path, content):
file = open(path, 'w')
file.write(content)
file.close()
def writeManifestFile(path, content):
file = open(path, 'a')
file.write(content + '\n')
file.close()
collections_response_body = urllib2.urlopen(collections_request_url).read()
collections_response_body_dict = json.loads(collections_response_body)
collections = collections_response_body_dict['response']['docs']
......@@ -28,18 +44,22 @@ for collection in collections:
collections_menu[index] = collection
print str(index) + ' - ' + collection['fgs_label_t']
choice = raw_input("Enter the number of the collection you want to download: ")
choice = int(choice)
collection_index = raw_input("Enter the number of the collection you want to download: ")
collection_index = int(collection_index)
indexes = list(collections_menu.keys())
if choice in indexes:
print collections_menu[choice]['fgs_label_t']
# @todo: Validate user input to be an integer.
if collection_index in indexes:
print collections_menu[collection_index]['fgs_label_t']
else:
print "That isn't a valid choice. Bye for now."
print "That isn't a valid collection_index. Bye for now."
exit()
output_directory = raw_input("Enter the output directory where you want the collection saved: ")
if not os.path.exists(output_directory):
os.makedirs(output_directory)
collection_pid = collections_menu[choice]['PID'];
collection_pid = collections_menu[collection_index]['PID'];
objects_query = 'RELS_EXT_isMemberOfCollection_uri_t:"' + collection_pid + '"?sort=fgs_label_s+asc&fl=PID,fgs_label_t,RELS_EXT_hasModel_uri_t&rows=1000000'
objects_request_url = site_rest_url + 'solr/' + objects_query
objects_response_body = urllib2.urlopen(objects_request_url).read()
......@@ -47,6 +67,16 @@ objects_response_body_dict = json.loads(objects_response_body)
objects = objects_response_body_dict['response']['docs']
for object in objects:
# pp.pprint(object)
print object['PID'] + '\t' + object['RELS_EXT_hasModel_uri_t'] + '\t' + object['fgs_label_t']
manifest_entry = object['PID'] + '\t' + object['RELS_EXT_hasModel_uri_t'] + '\t' + object['fgs_label_t']
writeManifestFile(os.path.join(output_directory, 'manifest.tsv'), manifest_entry)
print("Retrieving content from " + site_base_url + "islandora/object/" + object['PID'])
escaped_pid = urllib.quote_plus(object['PID'])
object_output_directory = os.path.join(output_directory, escaped_pid)
os.makedirs(object_output_directory)
dc_url = site_base_url + 'islandora/object/' + object['PID'] + '/datastream/DC/download'
dc_xml = urllib2.urlopen(dc_url).read()
dc_xml_path = os.path.join(object_output_directory, 'DC.xml')
writeContentFile(dc_xml_path, dc_xml)
print("Output is in " + os.path.abspath(output_directory))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment