Commit dd8e345f authored by Mark Jordan's avatar Mark Jordan
Browse files

OJB files now written out for single-file content models.

parent 2248333b
......@@ -9,9 +9,11 @@ import os
import urllib
import urllib2
import json
import mimetypes
reload(sys)
sys.setdefaultencoding('utf8')
mimetypes.init()
# For debugging purposes.
import pprint
......@@ -54,7 +56,7 @@ if collection_index in indexes:
else:
print "That isn't a valid collection_index. Bye for now."
exit()
output_directory = raw_input("Enter the output directory where you want the collection saved: ")
if not os.path.exists(output_directory):
os.makedirs(output_directory)
......@@ -69,14 +71,34 @@ objects = objects_response_body_dict['response']['docs']
for object in objects:
manifest_entry = object['PID'] + '\t' + object['RELS_EXT_hasModel_uri_t'] + '\t' + object['fgs_label_t']
writeManifestFile(os.path.join(output_directory, 'manifest.tsv'), manifest_entry)
print("Retrieving content from " + site_base_url + "islandora/object/" + object['PID'])
escaped_pid = urllib.quote_plus(object['PID'])
object_output_directory = os.path.join(output_directory, escaped_pid)
os.makedirs(object_output_directory)
# DC
dc_url = site_base_url + 'islandora/object/' + object['PID'] + '/datastream/DC/download'
dc_xml = urllib2.urlopen(dc_url).read()
dc_xml_path = os.path.join(object_output_directory, 'DC.xml')
writeContentFile(dc_xml_path, dc_xml)
print("Output is in " + os.path.abspath(output_directory))
# We will want to be able to control on a per-cmodel basis which datasterams
# get downloaded, e.g. for large image, it's the JPG, not the OBJ.
single_file_cmodels = [
'info:fedora/islandora:sp_basic_image',
'info:fedora/islandora:sp_large_image_cmodel',
'info:fedora/islandora:sp_pdf',
'info:fedora/islandora:sp-audioCModel',
'info:fedora/islandora:sp_videoCModel'
]
if object['RELS_EXT_hasModel_uri_t'] in single_file_cmodels:
obj_url = site_base_url + 'islandora/object/' + object['PID'] + '/datastream/OBJ/download'
obj_content_request = urllib2.urlopen(obj_url)
obj_content = obj_content_request.read()
mimetype = obj_content_request.info().getheader('Content-Type')
extension = mimetypes.guess_extension(mimetype)
obj_content_path = os.path.join(object_output_directory, 'OBJ' + extension)
writeContentFile(obj_content_path, obj_content)
print("Output is in " + os.path.abspath(output_directory))
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment