diff --git a/extras/scripts/create_structure_files.php b/extras/scripts/create_structure_files.php new file mode 100644 index 0000000000000000000000000000000000000000..045e4106c19e565535e0bfd2316995f97af2b995 --- /dev/null +++ b/extras/scripts/create_structure_files.php @@ -0,0 +1,146 @@ +<?php +/** + * Adds strucutre file to root of compound ojects. + * path_to_directory_containing_compound_objects\ + * compound_object_1\ + * compound_object_2\ + * compound_object_3\ + * . + * . + * . + * + * + * Usage: + * + * > php create_strcutre_files.php path_to_directory_containing_compound_objects + * + * + * + */ + + +$target_directory = trim($argv[1]); + +if(!is_dir($target_directory)){ + exit("Please check that you have provided a full path to a directory as the input argument." . PHP_EOL); +} + +$path_to_xsl = "tree_to_compound_object.xsl"; + +scanWrapperDirectory($target_directory, 'structure', $path_to_xsl); + +// For use with use with get_dir_name(), which is used inside XSLT. +$compound_obj_path = ''; + +function scanWrapperDirectory($target_directory, $structurefilename = 'structure', $path_to_xsl) { + // basenames to exclude. + $exclude_array = array('..', '.DS_Store', 'Thumbs.db', '.'); + + $stuffinwrapperdirectory = scandir($target_directory); + foreach ($stuffinwrapperdirectory as $compoundObjectOrFile) { + $objpath = $target_directory . DIRECTORY_SEPARATOR . $compoundObjectOrFile; + if(!in_array($compoundObjectOrFile, $exclude_array) && is_dir($objpath)) { + global $compound_obj_path; + $compound_obj_path = $objpath; + // subdirectories of wrapper directory will be compound object. + // create a structure file for each. + $structure_xml = compoundObjectStructureXML($objpath); + + // Apply XSLT + $structure_xml = treeToCompound($path_to_xsl, $structure_xml); + $structure_xml_output_file_path = $objpath . DIRECTORY_SEPARATOR + . $structurefilename . '.xml'; + file_put_contents($structure_xml_output_file_path, $structure_xml); + + } + + } +} + + +function treeToCompound($path_to_xsl, $tree_output_xml) { + // Usage: php tree_to_compound.php tree_to_compound_object.xsl tree_output.xml + + $xsl = $path_to_xsl; + // tree_output_xml is an xml string. + $xml = $tree_output_xml; + + $xsl_doc = new DOMDocument(); + $xsl_doc->load($xsl); + + $xml_doc = new DOMDocument(); + $xml_doc->loadXML($xml); + + $xslt_proc = new XSLTProcessor(); + $xslt_proc->importStylesheet($xsl_doc); + $xslt_proc->registerPHPFunctions(); + + $output = $xslt_proc->transformToXML($xml_doc); + + return $output; +} + +/** + * Removes path segments leading up to the last segment. + * + * Called from within the XSLT stylesheet. + */ +function get_dir_name() { + //global $input_dir; + //global $target_directory; + global $compound_obj_path; + $input_dir = $compound_obj_path; + $dir_path = preg_replace('/(\.*)/', '', $input_dir); + $dir_path = rtrim($dir_path, DIRECTORY_SEPARATOR); + $base_dir_pattern = '#^.*' . DIRECTORY_SEPARATOR . '#'; + $dir_path = preg_replace($base_dir_pattern, '', $dir_path); + $dir_path = ltrim($dir_path, DIRECTORY_SEPARATOR); + echo $dir_path; + return $dir_path; +} + + +/** + * Recursively create XML string of directory structure/ + * Based on psuedo-code from http://stackoverflow.com/a/15096721/850828 + */ +function directoryXML($directory_path) { + + // basenames to exclude. + $exclude_array = array('..', '.DS_Store', 'Thumbs.db', '.'); + + $dir_name = basename($directory_path); + $xml = "<directory name='" . $dir_name . "'>"; + + $pathbase = pathinfo($directory_path, PATHINFO_BASENAME); + $stuffindirectory = scandir($directory_path); + + foreach($stuffindirectory as $subdirOrfile){ + + $subdirOrfilepath = $directory_path . DIRECTORY_SEPARATOR . $subdirOrfile; + + if(!in_array($subdirOrfile, $exclude_array) && is_file($subdirOrfilepath)){ + $xml .= "<file name='". $subdirOrfile . "' />"; + + } + + if(!in_array($subdirOrfile, $exclude_array) && is_dir($subdirOrfilepath)){ + $xml .= directoryXML($subdirOrfilepath); + } + + } + $xml .= "</directory>"; + return $xml; +} + +function compoundObjectStructureXML($dir_path) { + $xmlstring = "<tree>"; + $xmlstring .= directoryXML($dir_path); + $xmlstring .= "</tree>"; + $xml = new DOMDocument( "1.0"); + $xml->loadXML($xmlstring); + $xml->formatOutput = true; + return $xml->saveXML(); +} + +?> \ No newline at end of file diff --git a/extras/scripts/tree_to_compound_object.xsl b/extras/scripts/tree_to_compound_object.xsl new file mode 100644 index 0000000000000000000000000000000000000000..b046a98806afe11fe6c92071daf445bb38f17b23 --- /dev/null +++ b/extras/scripts/tree_to_compound_object.xsl @@ -0,0 +1,43 @@ +<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xmlns:php="http://php.net/xsl" xsl:extension-element-prefixes="php" exclude-result-prefixes="xsl xsi php"> + + <!-- XSLT stylesheet to convert the output of the 'tree' utility into XML that represents the structure + of an Islandora compound object. Called by the generate_compound_structure_file.php post-write hook + script Used by the MIK CSV compound toolchain. --> + + <xsl:output method="xml" encoding="utf-8" indent="yes"/> + <xsl:strip-space elements="*"/> + + <xsl:template match="tree/directory"> + <xsl:comment>Islandora compound structure file generated by MIK. On batch ingest, 'parent' elements + will become compound objects, and 'child' elements will become their children. Files in + directories named in child elements' 'content' attribute will be added as their datastreams. + If 'parent' elements do not contain a MODS.xml file, the value of the 'parent' 'title' + attribute will be used as the parent's title/label.</xsl:comment> + + <islandora_compound_object > + <xsl:attribute name="title"> + <xsl:value-of select="php:function('get_dir_name')" /> + </xsl:attribute> + <xsl:apply-templates/> + </islandora_compound_object> + </xsl:template> + + <!-- We aren't intersted in these nodes, so apply an empty template to them. --> + <xsl:template match="report|directories|files"/> + + <xsl:template match="directory"> + <xsl:choose> + <xsl:when test="count(file) > 1"> + <child content="{./@name}"/> + <xsl:apply-templates/> + </xsl:when> + <xsl:otherwise> + <parent title="{./@name}"> + <xsl:apply-templates/> + </parent> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + +</xsl:stylesheet> diff --git a/includes/preprocessor.inc b/includes/preprocessor.inc index bdde9d15430feee0e33aef7c9fb9004800238f1b..a4f00ed7240b0ed607d7fa4f61671d21aa830fb6 100644 --- a/includes/preprocessor.inc +++ b/includes/preprocessor.inc @@ -10,14 +10,14 @@ */ class IslandoraCompoundBatchPreprocessor extends IslandoraBatchPreprocessor { // prefixed flag for outputing echo or print statements - protected $icbp_verbose = false; + protected $icbp_verbose = FALSE; /** * Function to get the OBJ XML files from the input directory. */ public function preprocess() { if (strtolower($this->parameters['icbp_verbose']) == 'true') { - $this->icbp_verbose = true; + $this->icbp_verbose = TRUE; } //$input_path = $this->parameters['target'] . DIRECTORY_SEPARATOR . '*.xml'; @@ -39,6 +39,7 @@ class IslandoraCompoundBatchPreprocessor extends IslandoraBatchPreprocessor { foreach ($iterator as $fileinfo) { $filePath = $fileinfo->getPathname(); $fileExtension = pathinfo($filePath, PATHINFO_EXTENSION); + $fileNameWithExtension = basename($filePath); $isObjFile = $cbUtilities->extInContentModelMap($fileExtension); if ($fileinfo->isFile() && $isObjFile) { $batch_object = new IslandoraCompoundBatchObject($this->connection, $filePath, $this->parameters); @@ -49,8 +50,8 @@ class IslandoraCompoundBatchPreprocessor extends IslandoraBatchPreprocessor { //$files[] = $filePath; $added[]= $batch_object; } - if ($fileExtension == 'cpd') { - + if ($fileNameWithExtension == 'structure.xml' /*$fileExtension == 'cpd'*/) { + // The structure file will be in the root directory of the compound object. // We need to create a parent wrapper object to associate the OBJ child objects too. // Along with the structure file in the root directory of the compound object @@ -70,9 +71,9 @@ class IslandoraCompoundBatchPreprocessor extends IslandoraBatchPreprocessor { $compound_object_pid = $this->addToDatabase($batch_object); //echo "Return Value: $returnvalue" . PHP_EOL; - $this->addCompoundStrcuture($filePath, $compound_object_pid); + //$this->addCompoundStrcuture($filePath, $compound_object_pid); //$this->addCompoundStrcuture($filePath); - + $this->addStrcutureData($filePath, $compound_object_pid); } @@ -85,7 +86,7 @@ class IslandoraCompoundBatchPreprocessor extends IslandoraBatchPreprocessor { * Echos statement if icbp_verbose property is true (set using --icbp_verbose drush option) */ public function icbpecho($string) { - if ($this->icbp_verbose == true){ + if ($this->icbp_verbose == TRUE) { echo $string; } } @@ -148,5 +149,68 @@ class IslandoraCompoundBatchPreprocessor extends IslandoraBatchPreprocessor { } } // End of addCompoundStrcuture + + + /** + * Parses structure.xml file and adds data to the islandora_compound_batch database table + * + */ + public function addStrcutureData($structure_file_path, $compound_object_pid) { + static $object_num = 0; + + $batch_id = $this->getSetId(); + + // Object ID is the batch set ID concatenated with the object number. + $object_id = $batch_id . $object_num; + + // Increment + $object_num++; + + + $doc = new DOMDocument(); + $doc->load($structure_file_path); + + /* + <islandora_compound_object title="895"> + <child content="893"/> + <child content="894"/> + </islandora_compound_object> + */ + + $wanted_tags = array('child'); + foreach ($doc->getElementsByTagName('*') as $node) { + if (in_array($node->nodeName, $wanted_tags)) { + $this->icbpecho("Node name:\t" . $node->nodeName . "\n"); + if ($node->nodeName == 'child') { + $xpath = new DOMXPath($doc); + $pointers = $xpath->query('@content', $node); + //var_dump($pointers); + $pointer = $pointers->item(0); + $this->icbpecho("Page pointer:\t" . $pointer->nodeValue . "\n"); + $pointerValue = $pointer->nodeValue; + } + $path = $node->getNodePath(); + $this->icbpecho("Path:\t\t$path\n"); + + $parentNode = $node->parentNode; + $this->icbpecho("Parent path:\t" . $parentNode->getNodePath() . "\n\n"); + + $parentNode = trim($parentNode->getNodePath()); + $insert_result = db_insert('islandora_compound_batch') + ->fields(array( + 'object_id' => $object_id, + 'object_xpath' => $path, + 'parent_xpath' => $parentNode, + 'pageptr' => $pointerValue, + 'parent_pid' => $compound_object_pid, + 'batch_id' => $batch_id + ))->execute(); + + $this->icbpecho("Insert Result: $insert_result" . PHP_EOL); + } + + } + + } // End of addStrcutureData } \ No newline at end of file diff --git a/includes/utilities.inc b/includes/utilities.inc index 041757e0442d1a6035ec935504d8ff8ca7101efc..6c8cba1533a603a22d4193a2d301ae6fe3f6adee 100644 --- a/includes/utilities.inc +++ b/includes/utilities.inc @@ -103,5 +103,51 @@ class Utilies { ); } } + + /** + * Recursively create XML string of directory structure/ + * Based on psuedo-code from http://stackoverflow.com/a/15096721/850828 + */ + private function directoryXML($directory_path) { + + // basenames to exclude. + $exclude_array = array('..', '.DS_Store', 'Thumbs.db', '.'); + + $dir_name = basename($directory_path); + $xml = "<directory name='" . $dir_name . "'>"; + + $pathbase = pathinfo($directory_path, PATHINFO_BASENAME); + + $stuffindirectory = scandir($directory_path); + + foreach ($stuffindirectory as $subdirOrfile) { + + $subdirOrfilepath = $directory_path . DIRECTORY_SEPARATOR . $subdirOrfile; + + if (!in_array($subdirOrfile, $exclude_array) && is_file($subdirOrfilepath)) { + $xml .= "<file name='" . $subdirOrfile . "' />"; + } + + if (!in_array($subdirOrfile, $exclude_array) && is_dir($subdirOrfilepath)) { + $xml .= directoryXML($subdirOrfilepath); + } + + } + + $xml .= "</directory>"; + + return $xml; + } + + public function createStructureXML($target_directory) { + $xmlstring = "<tree>"; + $xmlstring .= $this->directoryXML($target_directory); + $xmlstring .= "</tree>"; + $xml = new DOMDocument( "1.0"); + $xml->loadXML($xmlstring); + $xml->formatOutput = TRUE; + return $xml->saveXML(); + } + } \ No newline at end of file