Index: METSManifest.java =================================================================== --- METSManifest.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) +++ METSManifest.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -53,16 +53,17 @@ import org.dspace.authorize.AuthorizeException; import org.dspace.content.Bitstream; import org.dspace.content.DSpaceObject; -import org.dspace.content.Item; import org.dspace.content.crosswalk.CrosswalkException; import org.dspace.content.crosswalk.CrosswalkObjectNotSupported; import org.dspace.content.crosswalk.MetadataValidationException; import org.dspace.content.crosswalk.IngestionCrosswalk; +import org.dspace.content.crosswalk.StreamIngestionCrosswalk; import org.dspace.core.ConfigurationManager; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.core.PluginManager; import org.jdom.Document; +import org.jdom.Content; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.Namespace; @@ -142,7 +143,8 @@ * @throws AuthorizeException if it is returned by services called by this method. */ public InputStream getInputStream(Element mdRef) - throws MetadataValidationException, IOException, SQLException, AuthorizeException; + throws MetadataValidationException, PackageValidationException, + IOException, SQLException, AuthorizeException; } /** log4j category */ @@ -154,10 +156,10 @@ /** Prefix of DSpace configuration lines that map METS metadata type to * crosswalk plugin names. */ - private final static String CONFIG_METADATA_PREFIX = "mets.submission.crosswalk."; + public final static String CONFIG_METS_PREFIX = "mets."; /** prefix of config lines identifying local XML Schema (XSD) files */ - private final static String CONFIG_XSD_PREFIX = "mets.xsd."; + private final static String CONFIG_XSD_PREFIX = CONFIG_METS_PREFIX+"xsd."; /** Dublin core element namespace */ private static Namespace dcNS = Namespace @@ -172,7 +174,7 @@ .getNamespace("mets", "http://www.loc.gov/METS/"); /** XLink namespace -- includes "xlink" prefix prefix for use in XPaths */ - private static Namespace xlinkNS = Namespace + public static Namespace xlinkNS = Namespace .getNamespace("xlink", "http://www.w3.org/1999/xlink"); /** root element of the current METS manifest. */ @@ -187,6 +189,9 @@ /** builder to use for mdRef streams, inherited from create() */ private SAXBuilder parser = null; + /** name of packager who created this manifest object, for looking up configuration entries. */ + private String configName; + // Create list of local schemas at load time, since it depends only // on the DSpace configuration. private static String localSchemas; @@ -237,6 +242,7 @@ } } localSchemas = result.toString(); + if (log.isDebugEnabled()) log.debug("Got local schemas = \""+localSchemas+"\""); } @@ -245,11 +251,12 @@ * @param builder XML parser (for parsing mdRef'd files and binData) * @param mets parsed METS document */ - private METSManifest(SAXBuilder builder, Element mets) + private METSManifest(SAXBuilder builder, Element mets, String configName) { super(); this.mets = mets; parser = builder; + this.configName = configName; } /** @@ -262,12 +269,14 @@ * or validating the METS. * @return new METSManifest object. */ - public static METSManifest create(InputStream is, boolean validate) + public static METSManifest create(InputStream is, boolean validate, String configName) throws IOException, MetadataValidationException { SAXBuilder builder = new SAXBuilder(validate); + builder.setIgnoringElementContentWhitespace(true); + // Set validation feature if (validate) builder.setFeature("http://apache.org/xml/features/validation/schema", @@ -287,12 +296,13 @@ { metsDocument = builder.build(is); - // XXX for temporary debugging - /* - XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat()); - log.debug("Got METS DOCUMENT:"); - log.debug(outputPretty.outputString(metsDocument)); - */ + /*** XXX leave commented out except if needed for + *** viewing the METS document that actually gets read. + * + * XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat()); + * log.debug("Got METS DOCUMENT:"); + * log.debug(outputPretty.outputString(metsDocument)); + ****/ } catch (JDOMException je) { @@ -300,7 +310,7 @@ + is.toString(), je); } - return new METSManifest(builder, metsDocument.getRootElement()); + return new METSManifest(builder, metsDocument.getRootElement(), configName); } /** @@ -313,6 +323,17 @@ } /** + * Return the OBJID attribute of the METS manifest. + * This is where the Handle URI/URN of the object can be found. + * + * @return OBJID attribute of METS manifest + */ + public String getObjID() + { + return mets.getAttributeValue("OBJID"); + } + + /** * Gets all file elements which make up * the item's content. * @return a List of Elements. @@ -379,9 +400,9 @@ * attribute is peculiar to the DSpace METS SIP profile, and may not be * generally useful with other sorts of METS documents. * @param file METS file element of derived file - * @return file Element of original or null if none found. + * @return file path of original or null if none found. */ - public Element getOriginalFile(Element file) + public String getOriginalFilePath(Element file) { String groupID = file.getAttributeValue("GROUPID"); if (groupID == null || groupID.equals("")) @@ -395,10 +416,12 @@ List oFiles = xpath.selectNodes(mets); if (oFiles.size() > 0) { - log.debug("Got ORIGINAL file for derived="+file.toString()); - return (Element)oFiles.get(0); + if (log.isDebugEnabled()) + log.debug("Got ORIGINAL file for derived="+file.toString()); + Element flocat = ((Element)oFiles.get(0)).getChild("FLocat", metsNS); + if (flocat != null) + return flocat.getAttributeValue("href", xlinkNS); } - else return null; } catch (JDOMException je) @@ -481,11 +504,11 @@ * * @return file element of Item's primary bitstream, or null if there is none. */ - public Element getPrimaryBitstream() + public Element getPrimaryOrLogoBitstream() throws MetadataValidationException { - Element firstDiv = getFirstDiv(); - Element fptr = firstDiv.getChild("fptr", metsNS); + Element objDiv = getObjStructDiv(); + Element fptr = objDiv.getChild("fptr", metsNS); if (fptr == null) return null; String id = fptr.getAttributeValue("FILEID"); @@ -497,7 +520,8 @@ return result; } - /** Get the metadata type from within a *mdSec element. + /** + * Get the metadata type from within a *mdSec element. * @return metadata type name. */ public String getMdType(Element mdSec) @@ -545,10 +569,27 @@ * @throws MetadataValidationException if METS is invalid, or there is an error parsing the XML. */ public List getMdContentAsXml(Element mdSec, Mdref callback) - throws MetadataValidationException, IOException, SQLException, AuthorizeException + throws MetadataValidationException, PackageValidationException, + IOException, SQLException, AuthorizeException { try { + // XXX sanity check: if this has more than one child, consider it + // an error since we cannot deal with more than one mdRef|mdWrap + // child. This may be considered a bug and need to be fixed, + // so it's best to bring it to the attention of users. + List mdc = mdSec.getChildren(); + if (mdc.size() > 1) + { + // XXX scaffolding for debugging diagnosis; at least one + // XML parser stupidly includes newlines in prettyprinting + // as text content objects.. + String id = mdSec.getAttributeValue("ID"); + StringBuffer sb = new StringBuffer(); + for (Iterator mi = mdc.iterator(); mi.hasNext();) + sb.append(", ").append(((Content)mi.next()).toString()); + throw new MetadataValidationException("Cannot parse METS with "+mdSec.getQualifiedName()+" element that contains more than one child, size="+String.valueOf(mdc.size())+", ID="+id+"Kids="+sb.toString()); + } Element mdRef = null; Element mdWrap = mdSec.getChild("mdWrap", metsNS); if (mdWrap != null) @@ -618,7 +659,8 @@ * @throws MetadataValidationException if METS format does not contain any metadata. */ public InputStream getMdContentAsStream(Element mdSec, Mdref callback) - throws MetadataValidationException, IOException, SQLException, AuthorizeException + throws MetadataValidationException, PackageValidationException, + IOException, SQLException, AuthorizeException { Element mdRef = null; Element mdWrap = mdSec.getChild("mdWrap", metsNS); @@ -653,45 +695,147 @@ } - // special call to crosswalk the guts of a metadata *Sec (dmdSec, amdSec) - // because mdRef and mdWrap have to be handled differently. - // It's a lot like getMdContentAsXml but cannot use that because xwalk - // should be called with root element OR list depending on what was given. - private void crosswalkMdContent(Element mdSec, Mdref callback, - IngestionCrosswalk xwalk, Context context, DSpaceObject dso) - throws CrosswalkException, IOException, SQLException, AuthorizeException - { - List xml = getMdContentAsXml(mdSec,callback); - - // if we get inappropriate metadata, e.g. PREMIS for Item, let it go. - try - { - xwalk.ingest(context, dso, xml); - } - catch (CrosswalkObjectNotSupported e) - { - log.warn("Skipping metadata for inappropriate type of object: Object="+dso.toString()+", error="+e.toString()); - } - } - - // return first
of first ; - // in DSpace profile, this is where item-wide dmd and other metadata - // lives as IDrefs. - private Element getFirstDiv() + /** + * Return the
which describes this DSpace Object (and its contents) + * from the . In all cases, this is the first
in the first + * . + * + * @return Element which is the DSpace Object Contents
+ * @throws MetadataValidationException + */ + public Element getObjStructDiv() throws MetadataValidationException { + //get first Element sm = mets.getChild("structMap", metsNS); if (sm == null) throw new MetadataValidationException("METS document is missing the required structMap element."); + //get first
Element result = sm.getChild("div", metsNS); if (result == null) throw new MetadataValidationException("METS document is missing the required first div element in first structMap."); - log.debug("Got firstDiv result="+result.toString()); + if (log.isDebugEnabled()) + log.debug("Got getObjStructDiv result="+result.toString()); + return (Element)result; } + /** + * Get an array of child object
's from the METS Manifest + * These
's reference the location of any child objects METS manifests + * + * @return a List of Elements, each a
. May be empty but NOT null + * @throws MetadataValidationException + */ + public List getChildObjDivs() + throws MetadataValidationException + { + //get the
in which describes the current object's contents + Element objDiv = getObjStructDiv(); + + //get the child
's -- these should reference the child METS manifest + return objDiv.getChildren("div", metsNS); + } + + /** + * Retrieve the file paths for the children objects' METS Manifest files. + * These file paths are located in the where @LOCTYPE=URL + * + * @return a list of Strings, corresponding to relative file paths of children METS manifests + * @throws MetadataValidationException + */ + public String[] getChildMetsFilePaths() + throws MetadataValidationException + { + //get our child object
's + List childObjDivs = getChildObjDivs(); + + List childPathList = new ArrayList(); + + if(childObjDivs != null && !childObjDivs.isEmpty()) + { + Iterator childIterator = childObjDivs.iterator(); + //For each Div, we want to find the underlying with @LOCTYPE=URL + while(childIterator.hasNext()) + { + Element childDiv = (Element) childIterator.next(); + //get all child 's + List childMptrs = childDiv.getChildren("mptr", metsNS); + + if(childMptrs!=null && !childMptrs.isEmpty()) + { + Iterator mptrIterator = childMptrs.iterator(); + //For each mptr, we want to find the one with @LOCTYPE=URL + while(mptrIterator.hasNext()) + { + Element mptr = (Element) mptrIterator.next(); + String locType = mptr.getAttributeValue("LOCTYPE"); + //if @LOCTYPE=URL, then capture @xlink:href as the METS Manifest file path + if (locType!=null && locType.equals("URL")) + { + String filePath = mptr.getAttributeValue("href", xlinkNS); + if(filePath!=null && filePath.length()>0) + childPathList.add(filePath); + } + }//end loop + }//end if 's exist + }//end child
loop + }//end if child
's exist + + String[] childPaths = new String[childPathList.size()]; + childPaths = (String[]) childPathList.toArray(childPaths); + return childPaths; + } + + /** + * Return the reference to the Parent Object from the "Parent" . + * This parent object is the owner of current object. + * + * @return Link to the Parent Object (this is the Handle of that Parent) + * @throws MetadataValidationException + */ + public String getParentOwnerLink() + throws MetadataValidationException + { + + //get a list of our structMaps + List childStructMaps = mets.getChildren("structMap", metsNS); + Element parentStructMap = null; + + // find the + if(!childStructMaps.isEmpty()) + { + for (Element structMap : childStructMaps) + { + String label = structMap.getAttributeValue("LABEL"); + if(label!=null && label.equalsIgnoreCase("Parent")) + { + parentStructMap = structMap; + break; + } + } + } + + if (parentStructMap == null) + throw new MetadataValidationException("METS document is missing the required structMap[@LABEL='Parent'] element."); + + //get first
+ Element linkDiv = parentStructMap.getChild("div", metsNS); + if (linkDiv == null) + throw new MetadataValidationException("METS document is missing the required first div element in structMap[@LABEL='Parent']."); + + //the link is in the in the @xlink:href attribute + Element mptr = linkDiv.getChild("mptr", metsNS); + if (mptr != null) + return mptr.getAttributeValue("href", xlinkNS); + + //return null if we couldn't find the link + return null; + } + + // return a single Element node found by one-off path. // use only when path varies each time you call it. private Element getElementByXPath(String path, boolean nullOk) @@ -717,18 +861,26 @@ } // Find crosswalk for the indicated metadata type (e.g. "DC", "MODS") - // The crosswalk plugin name MAY be indirected in config file, - // through an entry like - // mets.submission.crosswalk.{mdType} = {pluginName} - // e.g. - // mets.submission.crosswalk.DC = mysite-QDC - private IngestionCrosswalk getCrosswalk(String type) + private Object getCrosswalk(String type, Class clazz) { - String xwalkName = ConfigurationManager.getProperty(CONFIG_METADATA_PREFIX + type); + /** + * Allow DSpace Config to map the metadata type to a + * different crosswalk name either per-packager or for METS + * in general. First, look for config key like: + * mets..ingest.crosswalk.MDNAME = XWALKNAME + * then try + * mets.default.ingest.crosswalk.MDNAME = XWALKNAME + */ + String xwalkName = ConfigurationManager.getProperty( + CONFIG_METS_PREFIX+configName+".ingest.crosswalk."+type); if (xwalkName == null) - xwalkName = type; - return (IngestionCrosswalk) - PluginManager.getNamedPlugin(IngestionCrosswalk.class, xwalkName); + { + xwalkName = ConfigurationManager.getProperty( + CONFIG_METS_PREFIX+"default.ingest.crosswalk."+type); + if (xwalkName == null) + xwalkName = type; + } + return PluginManager.getNamedPlugin(clazz, xwalkName); } /** @@ -742,8 +894,8 @@ throws MetadataValidationException { // div@DMDID is actually IDREFS, a space-separated list of IDs: - Element firstDiv = getFirstDiv(); - String dmds = firstDiv.getAttributeValue("DMDID"); + Element objDiv = getObjStructDiv(); + String dmds = objDiv.getAttributeValue("DMDID"); if (dmds == null) throw new MetadataValidationException("Invalid METS: Missing reference to Item descriptive metadata, first div on first structmap must have a DMDID attribute."); String dmdID[] = dmds.split("\\s+"); @@ -763,11 +915,12 @@ throws MetadataValidationException { // div@ADMID is actually IDREFS, a space-separated list of IDs: - Element firstDiv = getFirstDiv(); - String amds = firstDiv.getAttributeValue("ADMID"); + Element objDiv = getObjStructDiv(); + String amds = objDiv.getAttributeValue("ADMID"); if (amds == null) { - log.debug("getItemRightsMD: No ADMID references found."); + if (log.isDebugEnabled()) + log.debug("getItemRightsMD: No ADMID references found."); return new Element[0]; } String amdID[] = amds.split("\\s+"); @@ -785,17 +938,150 @@ /** * Invokes appropriate crosswalks on Item-wide descriptive metadata. */ - public void crosswalkItem(Context context, Item item, Element dmd, Mdref callback) - throws MetadataValidationException, + public void crosswalkItemDmd(Context context, DSpaceObject dso, + Element dmdSec, Mdref callback) + throws MetadataValidationException, PackageValidationException, CrosswalkException, IOException, SQLException, AuthorizeException { - String type = getMdType(dmd); - IngestionCrosswalk xwalk = getCrosswalk(type); + crosswalkXmd(context, dso, dmdSec, callback); + } - if (xwalk == null) + /** + * Crosswalk all technical and source metadata sections that belong + * to the whole object. + * @throws MetadataValidationException if METS is invalid, e.g. referenced amdSec is missing. + */ + public void crosswalkObjectOtherAdminMD(Context context, DSpaceObject dso, + Mdref callback) + throws MetadataValidationException, PackageValidationException, + CrosswalkException, IOException, SQLException, AuthorizeException + { + for (String amdID : getAmdIDs()) + { + Element amdSec = getElementByXPath("mets:amdSec[@ID=\""+amdID+"\"]", false); + for (Iterator ti = amdSec.getChildren("techMD", metsNS).iterator(); ti.hasNext();) + crosswalkXmd(context, dso, (Element)ti.next(), callback); + for (Iterator ti = amdSec.getChildren("digiprovMD", metsNS).iterator(); ti.hasNext();) + crosswalkXmd(context, dso, (Element)ti.next(), callback); + for (Iterator ti = amdSec.getChildren("rightsMD", metsNS).iterator(); ti.hasNext();) + crosswalkXmd(context, dso, (Element)ti.next(), callback); + } + } + + /** + * Just crosswalk the sourceMD sections; used to set the handle and parent of AIP. + * @return true if any metadata section was actually crosswalked, false otherwise + */ + public boolean crosswalkObjectSourceMD(Context context, DSpaceObject dso, + Mdref callback) + throws MetadataValidationException, PackageValidationException, + CrosswalkException, IOException, SQLException, AuthorizeException + { + boolean result = false; + + for (String amdID : getAmdIDs()) + { + Element amdSec = getElementByXPath("mets:amdSec[@ID=\""+amdID+"\"]", false); + for (Iterator ti = amdSec.getChildren("sourceMD", metsNS).iterator(); ti.hasNext();) + { + crosswalkXmd(context, dso, (Element)ti.next(), callback); + result = true; + } + } + return result; + } + + /** + * Get an aray of all AMDID values for this object + * + * @return + * @throws MetadataValidationException + */ + private String[] getAmdIDs() + throws MetadataValidationException + { + // div@ADMID is actually IDREFS, a space-separated list of IDs: + Element objDiv = getObjStructDiv(); + String amds = objDiv.getAttributeValue("ADMID"); + if (amds == null) + { + if (log.isDebugEnabled()) + log.debug("crosswalkObjectTechMD: No ADMID references found."); + return new String[0]; + } + return amds.split("\\s+"); + } + + // Crosswalk *any* kind of metadata section - techMD, rightsMD, etc. + private void crosswalkXmd(Context context, DSpaceObject dso, + Element xmd, Mdref callback) + throws MetadataValidationException, PackageValidationException, + CrosswalkException, IOException, SQLException, AuthorizeException + { + String type = getMdType(xmd); + IngestionCrosswalk xwalk = (IngestionCrosswalk)getCrosswalk(type, IngestionCrosswalk.class); + + // If metadata is not simply applicable to object, + // let it go with a warning. + try + { + // xwalk the DOM-model + if (xwalk != null) + xwalk.ingest(context, dso, getMdContentAsXml(xmd,callback)); + + // try stream-based xwalk + else + { + StreamIngestionCrosswalk sxwalk = + (StreamIngestionCrosswalk)getCrosswalk(type, StreamIngestionCrosswalk.class); + if (sxwalk != null) + { + Element mdRef = xmd.getChild("mdRef", metsNS); + if (mdRef != null) + { + InputStream in = null; + try + { + in = callback.getInputStream(mdRef); + sxwalk.ingest(context, dso, in, + mdRef.getAttributeValue("MIMETYPE")); + } + finally + { + if (in != null) + in.close(); + } + } + else + { + Element mdWrap = xmd.getChild("mdWrap", metsNS); + if (mdWrap != null) + { + Element bin = mdWrap.getChild("binData", metsNS); + if (bin == null) + throw new MetadataValidationException("Invalid METS Manifest: mdWrap element for streaming crosswalk without binData child."); + else + { + byte value[] = Base64.decodeBase64(bin.getText().getBytes()); + sxwalk.ingest(context, dso, + new ByteArrayInputStream(value), + mdWrap.getAttributeValue("MIMETYPE")); + } + } + else throw new MetadataValidationException("Cannot process METS Manifest: "+ - "No crosswalk found for MDTYPE="+type); - crosswalkMdContent(dmd, callback, xwalk, context, item); + "Metadata of type="+type+" requires a reference to a stream (mdRef), which was not found in "+xmd.getName()); + } + } + else + throw new MetadataValidationException("Cannot process METS Manifest: "+ + "No crosswalk found for contents of "+xmd.getName()+" element, MDTYPE="+type); + } + } + catch (CrosswalkObjectNotSupported e) + { + log.warn("Skipping metadata section "+xmd.getName()+", type="+type+" inappropriate for this type of object: Object="+dso.toString()+", error="+e.toString()); + } } /** @@ -809,7 +1095,7 @@ */ public void crosswalkBitstream(Context context, Bitstream bitstream, String fileId, Mdref callback) - throws MetadataValidationException, + throws MetadataValidationException, PackageValidationException, CrosswalkException, IOException, SQLException, AuthorizeException { Element file = getElementByXPath("descendant::mets:file[@ID=\""+fileId+"\"]", false); @@ -827,47 +1113,32 @@ String amdID[] = amds.split("\\s+"); for (int i = 0; i < amdID.length; ++i) { - List techMDs = getElementByXPath("mets:amdSec[@ID=\""+amdID[i]+"\"]", false). - getChildren("techMD", metsNS); - Iterator ti = techMDs.iterator(); - while (ti.hasNext()) - { - Element techMD = (Element)ti.next(); - if (techMD != null) - { - String type = getMdType(techMD); - IngestionCrosswalk xwalk = getCrosswalk(type); - log.debug("Got bitstream techMD of type="+type+", for file ID="+fileId); - - if (xwalk == null) - throw new MetadataValidationException("Cannot process METS Manifest: "+ - "No crosswalk found for techMD MDTYPE="+type); - crosswalkMdContent(techMD, callback, xwalk, context, bitstream); - } - } + Element amdSec = getElementByXPath("mets:amdSec[@ID=\""+amdID[i]+"\"]", false); + for (Iterator ti = amdSec.getChildren("techMD", metsNS).iterator(); ti.hasNext();) + crosswalkXmd(context, bitstream, (Element)ti.next(), callback); + for (Iterator ti = amdSec.getChildren("sourceMD", metsNS).iterator(); ti.hasNext();) + crosswalkXmd(context, bitstream, (Element)ti.next(), callback); } } /** - * Find Handle (if any) identifier labelling this manifest. - * @return handle (never null) - * @throws MetadataValidationException if no handle available. + * @return root element of METS document. */ - public String getHandle() - throws MetadataValidationException + public Element getMets() { - // TODO: XXX Make configurable? Handle optionally passed in? - // FIXME: Not sure if OBJID is really the right place + return mets; + } - String handle = mets.getAttributeValue("OBJID"); + /** + * Return entire METS document as an inputStream + * + * @return entire METS document as a stream + */ + public InputStream getMetsAsStream() + { + XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat()); - if (handle != null && handle.startsWith("hdl:")) - { - return handle.substring(4); - } - else - { - throw new MetadataValidationException("Item has no valid Handle (OBJID)"); - } + return new ByteArrayInputStream( + outputPretty.outputString(mets).getBytes()); } } Index: AbstractPackageDisseminator.java =================================================================== --- AbstractPackageDisseminator.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 0) +++ AbstractPackageDisseminator.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -0,0 +1,211 @@ +/** + * AbstractPackageDisseminator.java + * + * Version: $Revision$ + * + * Date: $Date$ + * + * Copyright (c) 2010, DuraSpace. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of DuraSpace nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ +package org.dspace.content.packager; + +import org.apache.log4j.Logger; + +import java.io.File; +import java.io.IOException; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import org.dspace.authorize.AuthorizeException; +import org.dspace.content.Collection; +import org.dspace.content.Community; +import org.dspace.content.DSpaceObject; +import org.dspace.content.Item; +import org.dspace.content.ItemIterator; +import org.dspace.content.crosswalk.CrosswalkException; +import org.dspace.core.Constants; +import org.dspace.core.Context; + +/** + * An abstract implementation of a DSpace Package Disseminator, which + * implements a few helper/utility methods that most (all?) PackageDisseminators + * may find useful. + *

+ * First, implements recursive functionality in the disseminateAll() + * method of the PackageIngester interface. This method is setup to + * recursively call disseminate() method. + *

+ * All Package disseminators should either extend this abstract class + * or implement PackageDisseminator to better suit their needs. + * + * @author Tim Donohue + * @see PackageDisseminator + */ +public abstract class AbstractPackageDisseminator + implements PackageDisseminator +{ + /** log4j category */ + private static Logger log = Logger.getLogger(AbstractPackageDisseminator.class); + + /** List of all successfully disseminated package files */ + private List packageFileList = new ArrayList(); + + /** + * Recursively export one or more DSpace Objects as a series of packages. + * This method will export the given DSpace Object as well as all referenced + * DSpaceObjects (e.g. child objects) into a series of packages. The + * initial object is exported to the location specified by the OutputStream. + * All other packages are exported to the same directory location. + *

+ * Package is any serialized representation of the item, at the discretion + * of the implementing class. It does not have to include content bitstreams. + *
+ * Use the params parameter list to adjust the way the + * package is made, e.g. including a "metadataOnly" + * parameter might make the package a bare manifest in XML + * instead of a Zip file including manifest and contents. + *
+ * Throws an exception of the initial object is not acceptable or there is + * a failure creating the package. + * + * @param context DSpace context. + * @param dso initial DSpace object + * @param params Properties-style list of options specific to this packager + * @param pkgFile File where initial package should be written. All other + * packages will be written to the same directory as this File. + * @throws PackageValidationException if package cannot be created or there is + * a fatal error in creating it. + */ + public List disseminateAll(Context context, DSpaceObject dso, + PackageParameters params, File pkgFile) + throws PackageException, CrosswalkException, + AuthorizeException, SQLException, IOException + { + //If unset, make sure the Parameters specifies this is a recursive dissemination + if(!params.recursiveModeEnabled()) params.setRecursiveModeEnabled(true); + + //try to disseminate the first object using provided PackageDisseminator + disseminate(context, dso, params, pkgFile); + + //add to list of successfully disseminated packages + addToPackageList(pkgFile); + + //We can only recursively disseminate non-Items + //(NOTE: Items have no children, as Bitstreams/Bundles are created from Item packages) + if(dso.getType()!=Constants.ITEM) + { + //Determine where first file package was disseminated to, as all + //others will be written to same directory + String pkgDirectory = pkgFile.getParentFile().getCanonicalPath(); + if(!pkgDirectory.endsWith(File.separator)) pkgDirectory += File.separator; + String fileExtension = PackageUtils.getFileExtension(pkgFile.getName()); + + //recursively disseminate content, based on object type + switch (dso.getType()) + { + case Constants.COLLECTION : + //Also find all Items in this Collection and disseminate + Collection collection = (Collection) dso; + ItemIterator iterator = collection.getAllItems(); + while(iterator.hasNext()) + { + Item item = iterator.next(); + + //disseminate all items (recursively!) + String childFileName = pkgDirectory + PackageUtils.getPackageName(item, fileExtension); + disseminateAll(context, item, params, new File(childFileName)); + } + + break; + case Constants.COMMUNITY : + //Also find all SubCommunities in this Community and disseminate + Community community = (Community) dso; + Community[] subcommunities = community.getSubcommunities(); + for(int i=0; i + * This list can be useful in reporting back to the user what content has + * been disseminated as packages. It's used by disseminateAll() to report + * what packages were created. + * + * @return List of Files which correspond to the disseminated packages + */ + protected List getPackageList() + { + return packageFileList; + } +} Index: PackageConsumer.java =================================================================== --- PackageConsumer.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 0) +++ PackageConsumer.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -0,0 +1,153 @@ +/** + * PackageConsumer.java + * + * Version: $Revision$ + * + * Date: $Date$ + * + * Copyright (c) 2010, The DSpace Foundation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the DSpace Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +package org.dspace.content.packager; + +import java.io.File; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.dspace.content.DSpaceObject; +import org.dspace.core.ConfigurationManager; +import org.dspace.core.Constants; +import org.dspace.core.Context; +import org.dspace.core.PluginManager; +import org.dspace.event.Consumer; +import org.dspace.event.Event; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * An Event Consumer which creates AIPs as the DSpace model changes. This is + * done by invoking the packager utility in subprocesses, one per affected + * object. Any "console" output from the packager is logged. + *

+ * If a single operation (from the user's point of view) causes multiple + * update()s of a given object, then some objects may be exported more than once + * per user operation, but no object should be exported more than once per + * update(). See the object implementations to understand how this happens. In + * other words, over time the number of exports is not minimal, but I think it + * is as close as we can get. + *

+ * It is mandatory to configure {@code packageConsumer.workingDirectory} when + * employing this class; otherwise {@code initialize} will throw a + * NullPointerException. + * + * @author Mark Wood + */ +public class PackageConsumer implements Consumer +{ + /** Log file access */ + private static final Logger log = LoggerFactory + .getLogger(PackageConsumer.class); + + /** Configuration property: working directory. REQUIRED. */ + private static final String WORKING_DIRECTORY = "packageConsumer.workingDirectory"; + + /** Accumulator for unique objects to be exported */ + private Map objects; + + /** Configured working directory path */ + private String workingDirectory; + + /** Configured disseminator for AIPs */ + private PackageDisseminator disseminator; + + public void initialize() throws Exception + { + objects = new HashMap(); + + workingDirectory = ConfigurationManager.getProperty(WORKING_DIRECTORY); + if (null == workingDirectory) + throw new NullPointerException(WORKING_DIRECTORY + + " not configured"); + + disseminator = (PackageDisseminator) PluginManager + .getNamedPlugin(PackageDisseminator.class, "AIP"); + } + + public void consume(Context ctx, Event event) throws Exception + { + // TODO what event.getEventType() values are interesting? +/* + if (event.getEventType() & (Event.)) + return; +*/ + + int type = event.getObjectType(); + switch(type) + { + case Constants.COMMUNITY: + case Constants.COLLECTION: + case Constants.ITEM: + DSpaceObject object = event.getObject(ctx); + String handle = object.getHandle(); + objects.put(handle, object); + break; + } + } + + public void end(Context ctx) throws Exception + { + for (Entry entry : objects.entrySet()) + { + String handle = entry.getKey(); + DSpaceObject dso = entry.getValue(); + String type = Constants.typeText[dso.getType()]; + String filePath = workingDirectory + + File.pathSeparator + + type + + "@" + + handle.replace('/', '-') + + ".zip"; + File file = new File(filePath); + + if (log.isDebugEnabled()) + log.debug("Disseminating DSpace {} [ hdl={} ] to {}", + new Object[] { type, handle, filePath }); + disseminator.disseminate(ctx, dso, null, file); + } + objects.clear(); + } + + public void finish(Context ctx) throws Exception + { + // Nothing to do here + } +} Index: PDFPackager.java =================================================================== --- PDFPackager.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) +++ PDFPackager.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -38,11 +38,15 @@ package org.dspace.content.packager; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.sql.SQLException; import java.util.Calendar; +import java.util.List; import org.apache.log4j.Logger; import org.dspace.authorize.AuthorizeException; @@ -51,9 +55,9 @@ import org.dspace.content.Bundle; import org.dspace.content.Collection; import org.dspace.content.DCDate; -import org.dspace.content.DSpaceObject; import org.dspace.content.Item; import org.dspace.content.WorkspaceItem; +import org.dspace.content.DSpaceObject; import org.dspace.content.crosswalk.CrosswalkException; import org.dspace.content.crosswalk.MetadataValidationException; import org.dspace.core.Constants; @@ -80,6 +84,8 @@ * * @author Larry Stone * @version $Revision$ + * @see PackageIngester + * @see PackageDisseminator */ public class PDFPackager extends SelfNamedPlugin @@ -124,15 +130,15 @@ *

* @param context DSpace context. * @param collection collection under which to create new item. - * @param pkg input stream containing package to ingest. + * @param pkgFile The package file to ingest * @param params package parameters (none recognized) * @param license may be null, which takes default license. * @return workspace item created by ingest. * @throws PackageException if package is unacceptable or there is * a fatal error turning it into an Item. */ - public WorkspaceItem ingest(Context context, Collection collection, - InputStream pkg, PackageParameters params, + public DSpaceObject ingest(Context context, DSpaceObject parent, + File pkgFile, PackageParameters params, String license) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException @@ -144,41 +150,23 @@ Bitstream bs = null; WorkspaceItem wi = null; - /** XXX comment out for now - // XXX for debugging of parameter handling - if (params != null) - { - Enumeration pe = params.propertyNames(); - while (pe.hasMoreElements()) - { - String name = (String)pe.nextElement(); - String v[] = params.getProperties(name); - StringBuffer msg = new StringBuffer("PackageParam: "); - msg.append(name).append(" = "); - for (int i = 0; i < v.length; ++i) - { - if (i > 0) - msg.append(", "); - msg.append(v[i]); - } - log.debug(msg); - } - } - **/ - try { // Save the PDF in a bitstream first, since the parser // has to read it as well, and we cannot "rewind" it after that. - wi = WorkspaceItem.create(context, collection, false); + wi = WorkspaceItem.create(context, (Collection)parent, false); Item myitem = wi.getItem(); original = myitem.createBundle("ORIGINAL"); - bs = original.createBitstream(pkg); - pkg.close(); + + InputStream fileStream = new FileInputStream(pkgFile); + bs = original.createBitstream(fileStream); + fileStream.close(); + bs.setName("package.pdf"); setFormatToMIMEType(context, bs, "application/pdf"); bs.update(); - log.debug("Created bitstream ID="+String.valueOf(bs.getID())+", parsing..."); + if (log.isDebugEnabled()) + log.debug("Created bitstream ID="+String.valueOf(bs.getID())+", parsing..."); crosswalkPDF(context, myitem, bs.retrieve()); @@ -188,7 +176,9 @@ log.info(LogManager.getHeader(context, "ingest", "Created new Item, db ID="+String.valueOf(myitem.getID())+ ", WorkspaceItem ID="+String.valueOf(wi.getID()))); - return wi; + + myitem = PackageUtils.finishCreateItem(context, wi, null, params); + return myitem; } finally { @@ -216,23 +206,49 @@ } /** + * IngestAll() cannot be implemented for a PDF ingester, because there's only one PDF to ingest + */ + public List ingestAll(Context context, DSpaceObject parent, File pkgFile, + PackageParameters params, String license) + throws PackageException, UnsupportedOperationException, + CrosswalkException, AuthorizeException, + SQLException, IOException + { + throw new UnsupportedOperationException("PDF packager does not support the ingestAll() operation at this time."); + } + + + /** * Replace is not implemented. */ - public Item replace(Context ctx, Item item, InputStream pckage, PackageParameters params) - throws PackageValidationException, CrosswalkException, - AuthorizeException, SQLException, IOException, - UnsupportedOperationException + public DSpaceObject replace(Context context, DSpaceObject dso, + File pkgFile, PackageParameters params) + throws PackageException, UnsupportedOperationException, + CrosswalkException, AuthorizeException, + SQLException, IOException { - throw new UnsupportedOperationException("The replace operation is not implemented."); + throw new UnsupportedOperationException("PDF packager does not support the replace() operation at this time."); } /** + * ReplaceAll() cannot be implemented for a PDF ingester, because there's only one PDF to ingest + */ + public List replaceAll(Context context, DSpaceObject dso, + File pkgFile, PackageParameters params) + throws PackageException, UnsupportedOperationException, + CrosswalkException, AuthorizeException, + SQLException, IOException + { + throw new UnsupportedOperationException("PDF packager does not support the replaceAll() operation at this time."); + } + + /** * VERY crude dissemination: just look for the first * bitstream with the PDF package type, and toss it out. * Works on packages importer with this packager, and maybe some others. */ public void disseminate(Context context, DSpaceObject dso, - PackageParameters params, OutputStream out) + PackageParameters params, File pkgFile) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException { @@ -249,12 +265,35 @@ Bitstream pkgBs = PackageUtils.getBitstreamByFormat(item, pdff, Constants.DEFAULT_BUNDLE_NAME); if (pkgBs == null) throw new PackageValidationException("Cannot find Bitstream with format \""+BITSTREAM_FORMAT_NAME+"\""); + + //Make sure our package file exists + if(!pkgFile.exists()) + { + PackageUtils.createFile(pkgFile); + } + + //open up output stream to copy bistream to file + FileOutputStream out = new FileOutputStream(pkgFile); Utils.copy(pkgBs.retrieve(), out); + //close output stream & save to file + out.close(); } finally {} } /** + * disseminateAll() cannot be implemented for a PDF disseminator, because there's only one PDF to disseminate + */ + public List disseminateAll(Context context, DSpaceObject dso, + PackageParameters params, File pkgFile) + throws PackageException, CrosswalkException, + AuthorizeException, SQLException, IOException + { + throw new UnsupportedOperationException("PDF packager does not support the disseminateAll() operation at this time."); + } + + + /** * Identifies the MIME-type of this package, i.e. "application/pdf". * * @return the MIME type (content-type header) of the package to be returned @@ -303,14 +342,16 @@ // sanity check: item must have a title. if (title == null) throw new MetadataValidationException("This PDF file is unacceptable, it does not have a value for \"Title\" in its Info dictionary."); - log.debug("PDF Info dict title=\""+title+"\""); + if (log.isDebugEnabled()) + log.debug("PDF Info dict title=\""+title+"\""); item.addDC("title", null, "en", title); String value; Calendar date; if ((value = docinfo.getAuthor()) != null) { item.addDC("contributor", "author", null, value); - log.debug("PDF Info dict author=\""+value+"\""); + if (log.isDebugEnabled()) + log.debug("PDF Info dict author=\""+value+"\""); } if ((value = docinfo.getCreator()) != null) item.addDC("description", "provenance", "en", Index: DSpaceMETSDisseminator.java =================================================================== --- DSpaceMETSDisseminator.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) +++ DSpaceMETSDisseminator.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -39,29 +39,30 @@ package org.dspace.content.packager; import java.io.IOException; -import java.io.InputStream; import java.sql.SQLException; +import java.util.Date; +import java.util.List; +import java.util.ArrayList; import org.apache.log4j.Logger; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Bitstream; -import org.dspace.content.BitstreamFormat; import org.dspace.content.Bundle; +import org.dspace.content.DSpaceObject; import org.dspace.content.Item; import org.dspace.core.Constants; +import org.dspace.core.ConfigurationManager; import org.dspace.core.Context; import org.dspace.license.CreativeCommons; -import edu.harvard.hul.ois.mets.AmdSec; -import edu.harvard.hul.ois.mets.BinData; -import edu.harvard.hul.ois.mets.Loctype; -import edu.harvard.hul.ois.mets.MdRef; -import edu.harvard.hul.ois.mets.MdWrap; -import edu.harvard.hul.ois.mets.Mdtype; +import edu.harvard.hul.ois.mets.Agent; import edu.harvard.hul.ois.mets.Mets; -import edu.harvard.hul.ois.mets.RightsMD; -import edu.harvard.hul.ois.mets.helper.Base64; +import edu.harvard.hul.ois.mets.MetsHdr; +import edu.harvard.hul.ois.mets.Role; import edu.harvard.hul.ois.mets.helper.MetsException; +import edu.harvard.hul.ois.mets.Type; +import edu.harvard.hul.ois.mets.Name; +import edu.harvard.hul.ois.mets.helper.PCData; /** * Packager plugin to produce a @@ -95,13 +96,20 @@ private final static String PROFILE_LABEL = "DSpace METS SIP Profile 1.0"; // MDTYPE value for deposit license -- "magic string" + // NOTE: format is : private final static String DSPACE_DEPOSIT_LICENSE_MDTYPE = - "DSpace Deposit License"; + "DSpaceDepositLicense:DSPACE_DEPLICENSE"; - // MDTYPE value for CC license -- "magic string" - private final static String CREATIVE_COMMONS_LICENSE_MDTYPE = - "Creative Commons"; + // MDTYPE value for CC license in RDF -- "magic string" + // NOTE: format is : + private final static String CREATIVE_COMMONS_RDF_MDTYPE = + "CreativeCommonsRDF:DSPACE_CCRDF"; + // MDTYPE value for CC license in Text -- "magic string" + // NOTE: format is : + private final static String CREATIVE_COMMONS_TEXT_MDTYPE = + "CreativeCommonsText:DSPACE_CCTXT"; + /** * Return identifier string for the profile this produces. * @@ -129,17 +137,42 @@ } /** + * Create metsHdr element - separate so subclasses can override. + */ + public MetsHdr makeMetsHdr(Context context, DSpaceObject dso, + PackageParameters params) + { + MetsHdr metsHdr = new MetsHdr(); + metsHdr.setCREATEDATE(new Date()); // FIXME: CREATEDATE is now: + // maybe should be item create + // date? + + // Agent + Agent agent = new Agent(); + agent.setROLE(Role.CUSTODIAN); + agent.setTYPE(Type.ORGANIZATION); + Name name = new Name(); + name.getContent() + .add(new PCData(ConfigurationManager + .getProperty("dspace.name"))); + agent.getContent().add(name); + metsHdr.getContent().add(agent); + return metsHdr; + } + + + /** * Get DMD choice for Item. It defaults to MODS, but is overridden * by the package parameters if they contain any "dmd" keys. The * params may contain one or more values for "dmd"; each of those is * the name of a crosswalk plugin, optionally followed by colon and * its METS MDTYPE name. */ - public String [] getDmdTypes(PackageParameters params) + public String [] getDmdTypes(Context context, DSpaceObject dso, PackageParameters params) throws SQLException, IOException, AuthorizeException { - // XXX maybe let dmd choices be configured in DSpace config too? + // XXX FIXME maybe let dmd choices be configured in DSpace config? String result[] = null; if (params != null) @@ -157,144 +190,71 @@ * Default is PREMIS. This is both the name of the crosswalk plugin * and the METS MDTYPE. */ - public String getTechMdType(PackageParameters params) + public String[] getTechMdTypes(Context context, DSpaceObject dso, PackageParameters params) throws SQLException, IOException, AuthorizeException { - return "PREMIS"; - } - - /** - * Add rights MD (licenses) for DSpace item. These - * may include a deposit license, and Creative Commons. - */ - public void addRightsMd(Context context, Item item, AmdSec amdSec) - throws SQLException, IOException, AuthorizeException, MetsException + if (dso.getType() == Constants.BITSTREAM) { - addDepositLicense(context, item, amdSec); - addCreativeCommons(context, item, amdSec); + String result[] = new String[1]; + result[0] = "PREMIS"; + return result; } - - // Add deposit license, if any, as external file. - // Give it a unique name including the SID in case there are other - // deposit license artifacts in the Item. - private boolean addDepositLicense(Context context, Item item, AmdSec amdSec) - throws SQLException, IOException, AuthorizeException, MetsException - { - Bitstream licenseBs = findDepositLicense(context, item); - - if (licenseBs == null) - return false; else - { - String resource = "depositlicense_"+ - String.valueOf(licenseBs.getSequenceID())+".txt"; - addRightsStream(licenseBs.retrieve(), resource, "text/plain", - DSPACE_DEPOSIT_LICENSE_MDTYPE, amdSec); - return true; - } + return new String[0]; } - // if there's a CC RDF license, chuck it in external file. - private boolean addCreativeCommons(Context context, Item item, AmdSec amdSec) - throws SQLException, IOException, AuthorizeException, MetsException + public String[] getSourceMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException { - // License as base64encoded... - Bitstream cc; + return new String[0]; + } - if ((cc = CreativeCommons.getLicenseRdfBitstream(item)) != null) + public String[] getDigiprovMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException { - addRightsStream(cc.retrieve(), - (gensym("creativecommons") + ".rdf"), - "text/rdf", - CREATIVE_COMMONS_LICENSE_MDTYPE, amdSec); - } - else if ((cc = CreativeCommons.getLicenseTextBitstream(item)) != null) - { - addRightsStream(cc.retrieve(), - (gensym("creativecommons") + ".txt"), - "text/plain", - CREATIVE_COMMONS_LICENSE_MDTYPE, amdSec); - } - else - return false; - return true; + return new String[0]; } - // utility to add a stream to the METS manifest. - // use external file and mdRef if possible, wrap and binData if not. - private void addRightsStream(InputStream is , String resourceName, - String mimeType, String mdType, AmdSec amdSec) - throws IOException, MetsException - { - RightsMD rightsMD = new RightsMD(); - rightsMD.setID(gensym("rights")); - if (extraFiles == null) + public String makeBitstreamURL(Bitstream bitstream, PackageParameters params) { - MdWrap rightsMDWrap = new MdWrap(); - rightsMDWrap.setMIMETYPE(mimeType); - rightsMDWrap.setMDTYPE(Mdtype.OTHER); - rightsMDWrap.setOTHERMDTYPE(mdType); - BinData bin = new BinData(); - bin.getContent().add(new Base64(is)); - rightsMDWrap.getContent().add(bin); - rightsMD.getContent().add(rightsMDWrap); - } - else - { - extraFiles.put(resourceName, is); - MdRef rightsMDRef = new MdRef(); - rightsMDRef.setMIMETYPE(mimeType); - rightsMDRef.setMDTYPE(Mdtype.OTHER); - rightsMDRef.setOTHERMDTYPE(mdType); - rightsMDRef.setLOCTYPE(Loctype.URL); - rightsMDRef.setXlinkHref(resourceName); - rightsMD.getContent().add(rightsMDRef); - } - amdSec.getContent().add(rightsMD); + String base = "bitstream_"+String.valueOf(bitstream.getID()); + String ext[] = bitstream.getFormat().getExtensions(); + return (ext.length > 0) ? base+"."+ext[0] : base; } /** - * Utility to find the license bitstream from an item - * - * @param context - * DSpace context - * @param item - * the item - * @return the license bitstream or null - * - * @throws IOException - * if the license bitstream can't be read + * Add rights MD (licenses) for DSpace item. These + * may include a deposit license, and Creative Commons. */ - private static Bitstream findDepositLicense(Context context, Item item) + public String[] getRightsMdTypes(Context context, DSpaceObject dso, PackageParameters params) throws SQLException, IOException, AuthorizeException { - // get license format ID - int licenseFormatId = -1; - BitstreamFormat bf = BitstreamFormat.findByShortDescription(context, - "License"); - if (bf != null) - licenseFormatId = bf.getID(); + List result = new ArrayList(); - Bundle[] bundles = item.getBundles(Constants.LICENSE_BUNDLE_NAME); - for (int i = 0; i < bundles.length; i++) + if (dso.getType() == Constants.ITEM) { - // Assume license will be in its own bundle - Bitstream[] bitstreams = bundles[i].getBitstreams(); + Item item = (Item)dso; + if (PackageUtils.findDepositLicense(context, item) != null) + result.add(DSPACE_DEPOSIT_LICENSE_MDTYPE); - if (bitstreams[0].getFormat().getID() == licenseFormatId) - { - return bitstreams[0]; + if (CreativeCommons.getLicenseRdfBitstream(item) != null) + result.add(CREATIVE_COMMONS_RDF_MDTYPE); + else if (CreativeCommons.getLicenseTextBitstream(item) != null) + result.add(CREATIVE_COMMONS_TEXT_MDTYPE); } - } - - // Oops! No license! - return null; + return result.toArray(new String[result.size()]); } // This is where we'd elaborate on the default structMap; nothing to add, yet. - public void addStructMap(Context context, Item item, + public void addStructMap(Context context, DSpaceObject dso, PackageParameters params, Mets mets) throws SQLException, IOException, AuthorizeException, MetsException { } + + // only exclude metadata bundles from package. + public boolean includeBundle(Bundle bundle) + { + return ! PackageUtils.isMetaInfoBundle(bundle); + } } Index: DSpaceAIPDisseminator.java =================================================================== --- DSpaceAIPDisseminator.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 0) +++ DSpaceAIPDisseminator.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -0,0 +1,450 @@ +/* + * DSpaceAIPDisseminator.java + * + * Version: $Revision: 1.1 $ + * + * Date: $Date: 2006/03/17 00:04:38 $ + * + * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts + * Institute of Technology. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Hewlett-Packard Company nor the name of the + * Massachusetts Institute of Technology nor the names of their + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +package org.dspace.content.packager; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Date; +import java.util.List; +import java.util.ArrayList; + +import org.apache.log4j.Logger; +import org.dspace.authorize.AuthorizeException; +import org.dspace.content.Bitstream; +import org.dspace.content.Bundle; +import org.dspace.content.DSpaceObject; +import org.dspace.content.Item; +import org.dspace.content.Collection; +import org.dspace.content.Community; +import org.dspace.content.Site; +import org.dspace.core.Constants; +import org.dspace.core.ConfigurationManager; +import org.dspace.core.Context; +import org.dspace.license.CreativeCommons; + +import edu.harvard.hul.ois.mets.Agent; +import edu.harvard.hul.ois.mets.Loctype; +import edu.harvard.hul.ois.mets.Mets; +import edu.harvard.hul.ois.mets.MetsHdr; +import edu.harvard.hul.ois.mets.Name; +import edu.harvard.hul.ois.mets.Role; +import edu.harvard.hul.ois.mets.Div; +import edu.harvard.hul.ois.mets.Mptr; +import edu.harvard.hul.ois.mets.StructMap; +import edu.harvard.hul.ois.mets.Type; +import edu.harvard.hul.ois.mets.helper.MetsException; +import edu.harvard.hul.ois.mets.helper.PCData; +import java.net.URLEncoder; + +/** + * Subclass of the METS packager framework to disseminate a DSpace + * Archival Information Package (AIP). The AIP is intended to be, foremost, + * a _complete_ and _accurate_ representation of one object in the DSpace + * object model. An AIP contains all of the information needed to restore + * the object precisely in another DSpace archive instance. + *

+ * This ingester recognizes two distinct types of AIPs: "Manifest-Only" and "External". + * The Manifest-Only AIP, which is selected by specifying a PackageParameters + * key "manifestOnly" with the value "true", refers to all its contents by + * reference only. For Community or Collection AIPs this means all references to their + * child objects are just via Handles. For Item AIPs all Bitreams are just + * referenced by their asset store location instead of finding them in the "package". + * The Manifest-Only AIP package format is simply a METS XML document serialized into a file. + *

+ * An "external" AIP (the default), is a conventional Zip-file based package + * that includes copies of all bitstreams referenced by the object as well + * as a serialized METS XML document in the path "mets.xml". + * + * Configuration keys: + * The following take as values a space-and-or-comma-separated list + * of plugin names that name *either* a DisseminationCrosswalk or + * StreamDisseminationCrosswalk plugin. Shown are the dfeault values. + * The value may be a simple plugin name, or a METS MDsec-name followed by + * a colon and the plugin name e.g. "DSpaceHistory :HISTORY" + * + * # MD types to put in the sourceMD section of the object. + * aip.disseminate.sourceMD = AIP-TECHMD + * + * # MD types to put in the techMD section of the object (and member Bitstreams if an Item) + * aip.disseminate.techMD = PREMIS + * + * # MD types to put in digiprovMD section of the object. + * # (Note that this is disabled unless the History System is installed) + * #aip.disseminate.digiprovMD = DSpaceHistory :HISTORY + * + * # MD types to put in the rightsMD section of the object. + * aip.disseminate.rightsMD = DSpaceDepositLicense:DSPACE_DEPLICENSE, \ + * CreativeCommonsRDF:DSPACE_CCRDF, CreativeCommonsText:DSPACE_CCTXT + * + * # MD types to put in dmdSec's corresponding the object. + * aip.disseminate.dmd = MODS, DIM + * + * @author Larry Stone + * @version $Revision: 1.1 $ + * @see AbstractMETSDisseminator + */ +public class DSpaceAIPDisseminator + extends AbstractMETSDisseminator +{ + /** log4j category */ + private static Logger log = Logger.getLogger(DSpaceAIPDisseminator.class); + + /** + * Unique identifier for the profile of the METS document. + * To ensure uniqueness, it is the URL that the XML schema document would + * have _if_ there were to be one. There is no schema at this time. + */ + public final static String PROFILE_1_0 = + "http://www.dspace.org/schema/aip/mets_aip_1_0.xsd"; + + /** TYPE of the div containing AIP's parent handle in its mptr. */ + final public static String PARENT_DIV_TYPE = "AIP Parent Link"; + + // Default MDTYPE value for deposit license -- "magic string" + // NOTE: format is : + private final static String DSPACE_DEPOSIT_LICENSE_MDTYPE = + "DSpaceDepositLicense:DSPACE_DEPLICENSE"; + + // Default MDTYPE value for CC license in RDF -- "magic string" + // NOTE: format is : + private final static String CREATIVE_COMMONS_RDF_MDTYPE = + "CreativeCommonsRDF:DSPACE_CCRDF"; + + // Default MDTYPE value for CC license in Text -- "magic string" + // NOTE: format is : + private final static String CREATIVE_COMMONS_TEXT_MDTYPE = + "CreativeCommonsText:DSPACE_CCTXT"; + + /** + * Return identifier string for the METS profile this produces. + * + * @return string name of profile. + */ + public String getProfile() + { + return PROFILE_1_0; + } + + /** + * Returns name of METS fileGrp corresponding to a DSpace bundle name. + * For AIP the mapping is direct. + * @param bname name of DSpace bundle. + * @return string name of fileGrp + */ + public String bundleToFileGrp(String bname) + { + return bname; + } + + /** + * metsHdr for AIP. + * CREATEDATE is time at which the package (i.e. this manifest) was created. + * LASTMODDATE is last-modified time of the target object, if available. + * Agent describes the archive this belongs to. + */ + public MetsHdr makeMetsHdr(Context context, DSpaceObject dso, + PackageParameters params) + { + MetsHdr metsHdr = new MetsHdr(); + + // date the METS package/manifest was created. + metsHdr.setCREATEDATE(new Date()); + + if (dso.getType() == Constants.ITEM) + metsHdr.setLASTMODDATE(((Item)dso).getLastModified()); + + // Agent - name custodian, the DSpace Archive, by handle. + Agent agent = new Agent(); + agent.setROLE(Role.CUSTODIAN); + agent.setTYPE(Type.OTHER); + agent.setOTHERTYPE("DSpace Archive"); + Name name = new Name(); + name.getContent() + .add(new PCData(Site.getSiteHandle())); + agent.getContent().add(name); + metsHdr.getContent().add(agent); + return metsHdr; + } + + /** + * Get DMD choice for Item. It defaults to MODS, plus DIM. + */ + public String [] getDmdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException + { + String dmdTypes = ConfigurationManager.getProperty("aip.disseminate.dmd"); + if (dmdTypes == null) + { + String result[] = new String[2]; + result[0] = "MODS"; + result[1] = "DIM"; + return result; + } + else + return dmdTypes.split("\\s*,\\s*"); + } + + /** + * Get name of technical metadata crosswalk for Bitstreams. + * Default is PREMIS (for Bistreams only). + * This is both the name of the crosswalk plugin + * and the METS MDTYPE. + */ + public String[] getTechMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException + { + String techTypes = ConfigurationManager.getProperty("aip.disseminate.techMD"); + if (techTypes == null) + { + if (dso.getType() == Constants.BITSTREAM) + { + String result[] = new String[1]; + result[0] = "PREMIS"; + return result; + } + else + { + return new String[0]; + } + } + else + return techTypes.split("\\s*,\\s*"); + } + + /** + * Get name of source metadata crosswalk for each kind of DSO. + * Default is AIP-TECHMD. + * In an AIP, the sourceMD element MUST include the original persistent + * identifier (Handle) of the object, and the original persistent ID + * (Handle) of its parent in the archive, so that it can be restored. + */ + public String[] getSourceMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException + { + String sourceTypes = ConfigurationManager.getProperty("aip.disseminate.sourceMD"); + if (sourceTypes == null) + { + String result[] = new String[1]; + result[0] = "AIP-TECHMD"; + return result; + } + else + return sourceTypes.split("\\s*,\\s*"); + } + + /** + * Get name of provenance MD crosswalks - none by default. + */ + public String[] getDigiprovMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException + { + String dpTypes = ConfigurationManager.getProperty("aip.disseminate.digiprovMD"); + if (dpTypes == null) + return new String[0]; + else + return dpTypes.split("\\s*,\\s*"); + } + + /** + * Return crosswalks of Rights metadata types. By default, for Item + * only, return the deposit license and CreativeCommons if available. + */ + public String[] getRightsMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException + { + + // rights only apply to Item at this time. + if (dso.getType() == Constants.ITEM) + { + String rTypes = ConfigurationManager.getProperty("aip.disseminate.rightsMD"); + if (rTypes == null) + { + List result = new ArrayList(); + if (PackageUtils.findDepositLicense(context, (Item)dso) != null) + result.add(DSPACE_DEPOSIT_LICENSE_MDTYPE); + + if (CreativeCommons.getLicenseRdfBitstream((Item)dso) != null) + result.add(CREATIVE_COMMONS_RDF_MDTYPE); + else if (CreativeCommons.getLicenseTextBitstream((Item)dso) != null) + result.add(CREATIVE_COMMONS_TEXT_MDTYPE); + return result.toArray(new String[result.size()]); + } + else + return rTypes.split("\\s*,\\s*"); + } + return new String[0]; + } + + /** + * Get the URL by which the METS manifest refers to a Bitstream + * member of an Item the "package". Note that this ONLY has to work + * for the Bitstreams belonging to a Bunde in an Item, NOT for the + * other associated Bitstreams containing metadata streams, or logo + * of a Community/Collection, etc. + *

+ * For an manifest-only AIP, this is a reference to an HTTP URL where + * the bitstream should be able to be downloaded from. + * An external AIP names a file in the package + * with a relative URL, that is, relative pathname. + *

+ * @return String in URL format naming path to bitstream. + */ + public String makeBitstreamURL(Bitstream bitstream, PackageParameters params) + { + // if bare manifest, use external "persistent" URI for bitstreams + if (params != null && (params.getBooleanProperty("manifestOnly", false))) + { + // Try to build a persistent(-ish) URI for bitstream + // Format: {site-base-url}/bitstream/{item-handle}/{sequence-id}/{bitstream-name} + try + { + // get handle of parent Item of this bitstream, if there is one: + String handle = null; + Bundle[] bn = bitstream.getBundles(); + if (bn.length > 0) + { + Item bi[] = bn[0].getItems(); + if (bi.length > 0) + handle = bi[0].getHandle(); + } + if (handle != null) + { + return ConfigurationManager + .getProperty("dspace.url") + + "/bitstream/" + + handle + + "/" + + String.valueOf(bitstream.getSequenceID()) + + "/" + + URLEncoder.encode(bitstream.getName(), "UTF-8"); + } + } + catch (Exception e) + { + //do nothing -- we just fail to build a nice bitstream url + } + + // We should only get here if we failed to build a nice URL above + // so, by default, we're just going to return the bitstream name. + return bitstream.getName(); + } + else + { + String base = "bitstream_"+String.valueOf(bitstream.getID()); + String ext[] = bitstream.getFormat().getExtensions(); + return (ext.length > 0) ? base+"."+ext[0] : base; + } + } + + /** + * Adds another structMap element to contain the "parent link" that + * is an essential part of every AIP. This is a structmap of one + * div, which contains an mptr indicating the Handle of the parent + * of this object in the archive. The div has a unique TYPE attribute + * value, "AIP Parent Link", and the mptr has a LOCTYPE of "HANDLE" + * and an xlink:href containing the raw Handle value. + *

+ * Note that the parent Handle has to be stored here because the + * parent is needed to create a DSpace Object when restoring the + * AIP; it cannot be determined later once the ingester parses it + * out of the metadata when the crosswalks are run. So, since the + * crosswalks require an object to operate on, and creating the + * object requires a parent, we cannot depend on metadata processed + * by crosswalks (e.g. AIP techMd) for the parent, it has to be at + * a higher level in the AIP manifest. The structMap is an obvious + * and standards-compliant location for it. + */ + public void addStructMap(Context context, DSpaceObject dso, + PackageParameters params, Mets mets) + throws SQLException, IOException, AuthorizeException, MetsException + { + // find parent Handle + String parentHandle = null; + switch (dso.getType()) + { + case Constants.ITEM: + parentHandle = ((Item)dso).getOwningCollection().getHandle(); + break; + + case Constants.COLLECTION: + parentHandle = (((Collection)dso).getCommunities())[0].getHandle(); + break; + + case Constants.COMMUNITY: + Community parent = ((Community)dso).getParentCommunity(); + if (parent == null) + parentHandle = Site.getSiteHandle(); + else + parentHandle = parent.getHandle(); + case Constants.SITE: + break; + } + + // Parent Handle should only be null if we are creating a site-wide AIP + if(parentHandle!=null) + { + // add a structMap to contain div pointing to parent: + StructMap structMap = new StructMap(); + structMap.setID(gensym("struct")); + structMap.setTYPE("LOGICAL"); + structMap.setLABEL("Parent"); + Div div0 = new Div(); + div0.setID(gensym("div")); + div0.setTYPE(PARENT_DIV_TYPE); + div0.setLABEL("Parent of this DSpace Object"); + Mptr mptr = new Mptr(); + mptr.setID(gensym("mptr")); + mptr.setLOCTYPE(Loctype.HANDLE); + mptr.setXlinkHref(parentHandle); + div0.getContent().add(mptr); + structMap.getContent().add(div0); + mets.getContent().add(structMap); + } + } + + /** + * include all bundles in AIP as content. + */ + public boolean includeBundle(Bundle bundle) + { + return true; + } +} Index: PackageUtils.java =================================================================== --- PackageUtils.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) +++ PackageUtils.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -39,22 +39,31 @@ package org.dspace.content.packager; import java.io.ByteArrayInputStream; +import java.io.File; import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; +import java.util.HashMap; +import java.util.Map; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Bitstream; import org.dspace.content.BitstreamFormat; import org.dspace.content.Bundle; import org.dspace.content.Collection; +import org.dspace.content.Community; import org.dspace.content.DCValue; +import org.dspace.content.DSpaceObject; import org.dspace.content.FormatIdentifier; +import org.dspace.content.InstallItem; import org.dspace.content.Item; +import org.dspace.content.WorkspaceItem; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.license.CreativeCommons; +import org.dspace.workflow.WorkflowItem; +import org.dspace.workflow.WorkflowManager; /** * Container class for code that is useful to many packagers. @@ -65,7 +74,71 @@ public class PackageUtils { + + // Map of metadata elements for Communities and Collections + // Format is alternating key/value in a straight array; use this + // to initialize hash tables that convert to and from. + private final static String ccMetadataMap[] = + { + // getMetadata() -> DC element.term + "name", "dc.title", + "introductory_text", "dc.description", + "short_description", "dc.description.abstract", + "side_bar_text", "dc.description.tableofcontents", + "copyright_text", "dc.rights", + "provenance_description", "dc.provenance", + "license", "dc.rights.license" + }; + + // HashMaps to convert Community/Collection metadata to/from Dublin Core + // (useful when crosswalking Communities/Collections) + private final static Map ccMetadataToDC = new HashMap(); + private final static Map ccDCToMetadata = new HashMap(); + static + { + for (int i = 0; i < ccMetadataMap.length; i += 2) + { + ccMetadataToDC.put(ccMetadataMap[i], ccMetadataMap[i+1]); + ccDCToMetadata.put(ccMetadataMap[i+1], ccMetadataMap[i]); + } + } + /** + * Translate a Dublin Core metadata field into a Container's (Community or Collection) + * database column for that metadata entry. + *

+ * e.g. "dc.title" would translate to the "name" database column + *

+ * This method is of use when crosswalking Community or Collection metadata for ingest, + * as most ingest Crosswalks tend to deal with translating to DC-based metadata. + * + * @param dcField The dublin core metadata field + * @return The Community or Collection DB column where this metadata info is stored. + */ + public static String dcToContainerMetadata(String dcField) + { + return ccDCToMetadata.get(dcField); + } + + /** + * Translate a Container's (Community or Collection) database column into + * a valid Dublin Core metadata field. This is the opposite of 'dcToContainerMetadata()'. + *

+ * e.g. the "name" database column would translate to "dc.title" + *

+ * This method is of use when crosswalking Community or Collection metadata for dissemination, + * as most dissemination Crosswalks tend to deal with translating from DC-based metadata. + * + * + * @param databaseField The Community or Collection DB column + * @return The Dublin Core metadata field that this metadata translates to. + */ + public static String containerMetadataToDC(String databaseField) + { + return ccMetadataToDC.get(databaseField); + } + + /** * Test that item has adequate metadata. * Check item for the minimal DC metadata required to ingest a * new item, and throw a PackageValidationException if test fails. @@ -73,7 +146,7 @@ * * @param item - item to test. */ - public static void checkMetadata(Item item) + public static void checkItemMetadata(Item item) throws PackageValidationException { DCValue t[] = item.getDC( "title", null, Item.ANY); @@ -99,7 +172,20 @@ if (license == null) license = collection.getLicense(); InputStream lis = new ByteArrayInputStream(license.getBytes()); - Bundle lb = item.createBundle(Constants.LICENSE_BUNDLE_NAME); + + Bundle lb; + //If LICENSE bundle is missing, create it + Bundle[] bundles = item.getBundles(Constants.LICENSE_BUNDLE_NAME); + if(bundles==null || bundles.length==0) + { + lb = item.createBundle(Constants.LICENSE_BUNDLE_NAME); + } + else + { + lb = bundles[0]; + } + + //Create the License bitstream Bitstream lbs = lb.createBitstream(lis); lis.close(); BitstreamFormat bf = BitstreamFormat.findByShortDescription(context, "License"); @@ -247,6 +333,29 @@ String shortDesc, String MIMEType, String desc) throws SQLException, AuthorizeException { + return findOrCreateBitstreamFormat(context, shortDesc, MIMEType, desc, BitstreamFormat.KNOWN, false); + } + + /** + * Find or create a bitstream format to match the given short + * description. + * Used by packager ingesters to obtain a special bitstream + * format for the manifest (and/or metadata) file. + *

+ * NOTE: When creating a new format, do NOT set any extensions, since + * we don't want any file with the same extension, which may be something + * generic like ".xml", to accidentally get set to this format. + * @param context - the context. + * @param shortDesc - short descriptive name, used to locate existing format. + * @param MIMEtype - mime content-type + * @param desc - long description + * @param internal value for the 'internal' flag of a new format if created. + * @return BitstreamFormat object that was found or created. Never null. + */ + public static BitstreamFormat findOrCreateBitstreamFormat(Context context, + String shortDesc, String MIMEType, String desc, int supportLevel, boolean internal) + throws SQLException, AuthorizeException + { BitstreamFormat bsf = BitstreamFormat.findByShortDescription(context, shortDesc); // not found, try to create one @@ -256,9 +365,376 @@ bsf.setShortDescription(shortDesc); bsf.setMIMEType(MIMEType); bsf.setDescription(desc); - bsf.setSupportLevel(BitstreamFormat.KNOWN); + bsf.setSupportLevel(supportLevel); + bsf.setInternal(internal); bsf.update(); } return bsf; } + + /** + * Utility to find the license bitstream from an item + * + * @param context + * DSpace context + * @param item + * the item + * @return the license bitstream or null + * + * @throws IOException + * if the license bitstream can't be read + */ + public static Bitstream findDepositLicense(Context context, Item item) + throws SQLException, IOException, AuthorizeException + { + // get license format ID + int licenseFormatId = -1; + BitstreamFormat bf = BitstreamFormat.findByShortDescription(context, + "License"); + if (bf != null) + licenseFormatId = bf.getID(); + + Bundle[] bundles = item.getBundles(Constants.LICENSE_BUNDLE_NAME); + for (int i = 0; i < bundles.length; i++) + { + // Assume license will be in its own bundle + Bitstream[] bitstreams = bundles[i].getBitstreams(); + + for(int j=0; j < bitstreams.length; j++) + { + // The License should have a file format of "License" + if (bitstreams[j].getFormat().getID() == licenseFormatId) + { + //found a bitstream with format "License" -- return it + return bitstreams[j]; + } + } + + // If we couldn't find a bitstream with format = "License", + // we will just assume the first bitstream is the deposit license + // (usually a safe assumption as it is in the LICENSE bundle) + if(bitstreams.length>0) + return bitstreams[0]; + } + + // Oops! No license! + return null; + } + + + /*===================================================== + * Utility Methods -- may be useful for subclasses + *====================================================*/ + + + /** + * Create the specified DSpace Object, based on the passed + * in Package Parameters (along with other basic info required + * to create the object) + * + * @param context DSpace Context + * @param parent Parent Object + * @param type Type of new Object + * @param handle Handle of new Object (may be null) + * @param params Properties-style list of options (interpreted by each packager). + * @return newly created DSpace Object (or null) + * @throws AuthorizeException + * @throws SQLException + * @throws IOException + */ + public static DSpaceObject createDSpaceObject(Context context, DSpaceObject parent, int type, String handle, PackageParameters params) + throws AuthorizeException, SQLException, IOException + { + DSpaceObject dso = null; + + switch (type) + { + case Constants.COLLECTION: + dso = ((Community)parent).createCollection(handle); + return dso; + + case Constants.COMMUNITY: + // top-level community? + if (parent == null || parent.getType() == Constants.SITE) + dso = Community.create(null, context, handle); + else + dso = ((Community)parent).createSubcommunity(handle); + return dso; + + case Constants.ITEM: + //Initialize a WorkspaceItem + //(Note: set submitter to currentUser for now -- we can change it later if manifest specifies someone else) + WorkspaceItem wsi = WorkspaceItem.create(context, (Collection)parent, params.useCollectionTemplate(), context.getCurrentUser(), handle); + + // Finish creating item (this will either install item or start a workflow, based on params) + dso = finishCreateItem(context, wsi, handle, params); + + return dso; + } + + return null; + } + + /** + * Perform any final tasks on a newly created WorkspaceItem in order to finish + * ingestion of an Item. + *

+ * This may include starting up a workflow for the new item, restoring it, + * or archiving it (based on params passed in) + * + * @param context DSpace Context + * @param wsi Workspace Item that requires finishing + * @param handle Handle to assign to item (may be null) + * @param params Properties-style list of options (interpreted by each packager). + * @return finished Item + * @throws IOException + * @throws SQLException + * @throws AuthorizeException + */ + public static Item finishCreateItem(Context context, WorkspaceItem wsi, String handle, PackageParameters params) + throws IOException, SQLException, AuthorizeException + { + // restore existing object using the package (including attempting to restore the handle) + if (params.restoreModeEnabled()) + { + InstallItem.restoreItem(context, wsi, handle); + + //return newly restored item + return wsi.getItem(); + } + // submit normally, passing along to workflow + else if (params.workflowEnabled()) + { + // Start an item workflow + WorkflowItem wfi = WorkflowManager.startWithoutNotify(context, wsi); + + // return item with workflow started + return wfi.getItem(); + } + + // skip workflow, but otherwise normal submission + else + { + InstallItem.installItem(context, wsi, handle); + + // return newly installed item + return wsi.getItem(); + } + }//end finishCreateItem + + + /** + * Commit all recent changes to DSpaceObject. + *

+ * This method is necessary as there is no generic 'update()' on a DSpaceObject + * + * @param dso DSpaceObject to update + */ + public static void updateDSpaceObject(DSpaceObject dso) + throws AuthorizeException, SQLException, IOException + { + if (dso != null) + { + switch (dso.getType()) + { + case Constants.BITSTREAM: + ((Bitstream)dso).update(); + break; + case Constants.ITEM: + ((Item)dso).update(); + break; + case Constants.COLLECTION: + ((Collection)dso).update(); + break; + case Constants.COMMUNITY: + ((Community)dso).update(); + break; + } + } + } + + + /** + * Utility method to retrieve the file extension off of a filename. + * + * @param filename Full filename + * @return file extension + */ + public static String getFileExtension(String filename) + { + // Extract the file extension off of a filename + String extension = filename; + int lastDot = filename.lastIndexOf('.'); + + if (lastDot != -1) + { + extension = filename.substring(lastDot + 1); + } + + return extension; + } + + + /** + * Returns name of a dissemination information package (DIP), based on the + * DSpace object and a provided fileExtension + *

+ * Format: [dspace-obj-type]@[handle-with-dashes].[fileExtension] + * OR [dspace-obj-type]@internal-id-[dspace-ID].[fileExtension] + * + * @param dso DSpace Object to create file name for + * @param fileExtension file Extension of output file. + * @return filename of a DIP representing the DSpace Object + */ + public static String getPackageName(DSpaceObject dso, String fileExtension) + { + String handle = dso.getHandle(); + // if Handle is empty, use internal ID for name + if(handle==null || handle.isEmpty()) + handle = "internal-id-" + dso.getID(); + else // if Handle exists, replace '/' with '-' to meet normal file naming conventions + handle = handle.replace("/", "-"); + + //Get type name + int typeID = dso.getType(); + String type = Constants.typeText[typeID]; + + //check if passed in file extension already starts with "." + if(!fileExtension.startsWith(".")) fileExtension = "." + fileExtension; + + //Here we go, here's our magical file name! + //Format: typeName@handle.extension + return type + "@" + handle + fileExtension; + } + + + /** + * Creates the specified file (along with all parent directories) if it doesn't already + * exist. If the file already exists, nothing happens. + * + * @param file + * @return boolean true if succeeded, false otherwise + * @throws IOException + */ + public static boolean createFile(File file) + throws IOException + { + boolean success = false; + + //Check if file exists + if(!file.exists()) + { + //file doesn't exist yet, does its parent directory exist? + if(!file.getParentFile().exists()) + { + //create the parent directory structure + file.getParentFile().mkdirs(); + } + //create actual file + success = file.createNewFile(); + } + return success; + } + + /** + * Remove all bitstreams (files) associated with a DSpace object. + *

+ * If this object is an Item, it removes all bundles & bitstreams. If this + * object is a Community or Collection, it removes all logo bitstreams. + *

+ * This method is useful for replace functionality. + * + * @param dso The object to remove all bitstreams from + */ + public static void removeAllBitstreams(DSpaceObject dso) + throws SQLException, IOException, AuthorizeException + { + //If we are dealing with an Item + if(dso.getType()==Constants.ITEM) + { + Item item = (Item) dso; + // Get a reference to all Bundles in Item (which contain the bitstreams) + Bundle[] bunds = item.getBundles(); + + // Remove each bundle -- this will in turn remove all bitstreams associated with this Item. + for (int i = 0; i < bunds.length; i++) + { + item.removeBundle(bunds[i]); + } + } + else if (dso.getType()==Constants.COLLECTION) + { + Collection collection = (Collection) dso; + //clear out the logo for this collection + collection.setLogo(null); + } + else if (dso.getType()==Constants.COMMUNITY) + { + Community community = (Community) dso; + //clear out the logo for this community + community.setLogo(null); + } + } + + + /** + * Removes all metadata associated with a DSpace object. + *

+ * This method is useful for replace functionality. + * + * @param dso The object to remove all metadata from + */ + public static void clearAllMetadata(DSpaceObject dso) + throws SQLException, IOException, AuthorizeException + { + //If we are dealing with an Item + if(dso.getType()==Constants.ITEM) + { + Item item = (Item) dso; + //clear all metadata entries + item.clearMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY); + } + //Else if collection, clear its database table values + else if (dso.getType()==Constants.COLLECTION) + { + Collection collection = (Collection) dso; + + // Use the MetadataToDC map (defined privately in this class) + // to clear out all the Collection database fields. + for(String dbField : ccMetadataToDC.keySet()) + { + try + { + collection.setMetadata(dbField, null); + } + catch(IllegalArgumentException ie) + { + // ignore the error -- just means the field doesn't exist in DB + // Communities & Collections don't include the exact same metadata fields + } + } + } + //Else if community, clear its database table values + else if (dso.getType()==Constants.COMMUNITY) + { + Community community = (Community) dso; + + // Use the MetadataToDC map (defined privately in this class) + // to clear out all the Community database fields. + for(String dbField : ccMetadataToDC.keySet()) + { + try + { + community.setMetadata(dbField, null); + } + catch(IllegalArgumentException ie) + { + // ignore the error -- just means the field doesn't exist in DB + // Communities & Collections don't include the exact same metadata fields + } + } + } + + } + } Index: PackageDisseminator.java =================================================================== --- PackageDisseminator.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) +++ PackageDisseminator.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -38,9 +38,10 @@ package org.dspace.content.packager; +import java.io.File; import java.io.IOException; -import java.io.OutputStream; import java.sql.SQLException; +import java.util.List; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DSpaceObject; @@ -82,28 +83,65 @@ * "package" on the indicated OutputStream. Package is any serialized * representation of the item, at the discretion of the implementing * class. It does not have to include content bitstreams. - *
+ *

* Use the params parameter list to adjust the way the * package is made, e.g. including a "metadataOnly" * parameter might make the package a bare manifest in XML * instead of a Zip file including manifest and contents. - *
+ *

* Throws an exception of the chosen object is not acceptable or there is * a failure creating the package. * * @param context DSpace context. * @param object DSpace object (item, collection, etc) * @param params Properties-style list of options specific to this packager - * @param out output stream on which to write package + * @param pkgFile File where export package should be written * @throws PackageValidationException if package cannot be created or there is * a fatal error in creating it. */ void disseminate(Context context, DSpaceObject object, - PackageParameters params, OutputStream out) + PackageParameters params, File pkgFile) throws PackageException, CrosswalkException, AuthorizeException, SQLException, IOException; /** + * Recursively export one or more DSpace Objects as a series of packages. + * This method will export the given DSpace Object as well as all referenced + * DSpaceObjects (e.g. child objects) into a series of packages. The + * initial object is exported to the location specified by the pkgFile. + * All other generated packages are recursively exported to the same directory. + *

+ * Package is any serialized representation of the item, at the discretion + * of the implementing class. It does not have to include content bitstreams. + *

+ * Use the params parameter list to adjust the way the + * package is made, e.g. including a "metadataOnly" + * parameter might make the package a bare manifest in XML + * instead of a Zip file including manifest and contents. + *

+ * Throws an exception of the initial object is not acceptable or there is + * a failure creating the packages. + *

+ * A packager may choose not to implement disseminateAll, + * or simply forward the call to disseminate if it is unable to + * support recursive dissemination. + * + * @param context DSpace context. + * @param dso initial DSpace object + * @param params Properties-style list of options specific to this packager + * @param pkgFile File where initial package should be written. All other + * packages will be written to the same directory as this File. + * @return List of all package Files which were successfully disseminated + * @throws PackageValidationException if package cannot be created or there is + * a fatal error in creating it. + */ + List disseminateAll(Context context, DSpaceObject dso, + PackageParameters params, File pkgFile) + throws PackageException, CrosswalkException, + AuthorizeException, SQLException, IOException; + + + /** * Identifies the MIME-type of this package, e.g. "application/zip". * Required when sending the package via HTTP, to * provide the Content-Type header. Index: AbstractMETSDisseminator.java =================================================================== --- AbstractMETSDisseminator.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) +++ AbstractMETSDisseminator.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -38,64 +38,75 @@ package org.dspace.content.packager; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Date; import java.util.HashMap; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; -import org.apache.log4j.Logger; -import org.dspace.authorize.AuthorizeException; -import org.dspace.authorize.AuthorizeManager; -import org.dspace.content.Bitstream; -import org.dspace.content.Bundle; -import org.dspace.content.DSpaceObject; -import org.dspace.content.Item; -import org.dspace.content.crosswalk.CrosswalkException; -import org.dspace.content.crosswalk.DisseminationCrosswalk; -import org.dspace.core.ConfigurationManager; -import org.dspace.core.Constants; -import org.dspace.core.Context; -import org.dspace.core.PluginManager; -import org.dspace.core.Utils; -import org.jdom.Namespace; -import org.jdom.output.Format; -import org.jdom.output.XMLOutputter; - -import edu.harvard.hul.ois.mets.Agent; import edu.harvard.hul.ois.mets.AmdSec; +import edu.harvard.hul.ois.mets.BinData; import edu.harvard.hul.ois.mets.Checksumtype; import edu.harvard.hul.ois.mets.Div; import edu.harvard.hul.ois.mets.DmdSec; +import edu.harvard.hul.ois.mets.MdRef; import edu.harvard.hul.ois.mets.FLocat; import edu.harvard.hul.ois.mets.FileGrp; import edu.harvard.hul.ois.mets.FileSec; import edu.harvard.hul.ois.mets.Fptr; +import edu.harvard.hul.ois.mets.Mptr; import edu.harvard.hul.ois.mets.Loctype; import edu.harvard.hul.ois.mets.MdWrap; import edu.harvard.hul.ois.mets.Mdtype; import edu.harvard.hul.ois.mets.Mets; import edu.harvard.hul.ois.mets.MetsHdr; -import edu.harvard.hul.ois.mets.Name; -import edu.harvard.hul.ois.mets.Role; import edu.harvard.hul.ois.mets.StructMap; import edu.harvard.hul.ois.mets.TechMD; -import edu.harvard.hul.ois.mets.Type; +import edu.harvard.hul.ois.mets.SourceMD; +import edu.harvard.hul.ois.mets.DigiprovMD; +import edu.harvard.hul.ois.mets.RightsMD; +import edu.harvard.hul.ois.mets.helper.MdSec; import edu.harvard.hul.ois.mets.XmlData; +import edu.harvard.hul.ois.mets.helper.Base64; import edu.harvard.hul.ois.mets.helper.MetsElement; import edu.harvard.hul.ois.mets.helper.MetsException; import edu.harvard.hul.ois.mets.helper.MetsValidator; import edu.harvard.hul.ois.mets.helper.MetsWriter; -import edu.harvard.hul.ois.mets.helper.PCData; import edu.harvard.hul.ois.mets.helper.PreformedXML; +import java.io.File; +import java.io.FileOutputStream; +import org.apache.log4j.Logger; + +import org.dspace.authorize.AuthorizeException; +import org.dspace.authorize.AuthorizeManager; +import org.dspace.content.Bitstream; +import org.dspace.content.Bundle; +import org.dspace.content.Community; +import org.dspace.content.Collection; +import org.dspace.content.DSpaceObject; +import org.dspace.content.Item; +import org.dspace.content.ItemIterator; +import org.dspace.content.crosswalk.CrosswalkException; +import org.dspace.content.crosswalk.CrosswalkObjectNotSupported; +import org.dspace.content.crosswalk.DisseminationCrosswalk; +import org.dspace.content.crosswalk.StreamDisseminationCrosswalk; +import org.dspace.core.Constants; +import org.dspace.core.Context; +import org.dspace.core.PluginManager; +import org.dspace.core.Utils; +import org.dspace.license.CreativeCommons; +import org.jdom.Element; +import org.jdom.Namespace; +import org.jdom.output.Format; +import org.jdom.output.XMLOutputter; + /** * Base class for disseminator of * METS (Metadata Encoding & Transmission Standard) Package.
@@ -107,32 +118,33 @@ * different kinds of metadata and inner package structures. *

* Package Parameters:
- * manifestOnly -- if true, generate a standalone XML + *

    + *
  • manifestOnly -- if true, generate a standalone XML * document of the METS manifest instead of a complete package. Any * other metadata (such as licenses) will be encoded inline. - * Default is false. + * Default is false.
  • * - * unauthorized -- this determines what is done when the - * packager encounters a Bundle or Bitstream it is not authorized to - * read. By default, it just quits with an AuthorizeException. + *
  • unauthorized -- this determines what is done when the + * packager encounters a Bundle or Bitstream it is not authorized to + * read. By default, it just quits with an AuthorizeException. * If this option is present, it must be one of the following values: - * skip -- simply exclude unreadable content from package. - * zero -- include unreadable bitstreams as 0-length files; - * unreadable Bundles will still cause authorize errors. + *
      + *
    • skip -- simply exclude unreadable content from package.
    • + *
    • zero -- include unreadable bitstreams as 0-length files; + * unreadable Bundles will still cause authorize errors.
  • + *
* * @author Larry Stone * @author Robert Tansley + * @author Tim Donohue * @version $Revision$ */ public abstract class AbstractMETSDisseminator - implements PackageDisseminator + extends AbstractPackageDisseminator { /** log4j category */ private static Logger log = Logger.getLogger(AbstractMETSDisseminator.class); - /** Filename of manifest, relative to package toplevel. */ - public static final String MANIFEST_FILE = "mets.xml"; - // JDOM xml output writer - indented format for readability. private static XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat()); @@ -140,157 +152,294 @@ private int idCounter = 1; /** - * Table of files to add to package, such as mdRef'd metadata. - * Key is relative pathname of file, value is InputStream - * with contents to put in it. - * New map is created by disseminate(). + * Wrapper for a table of streams to add to the package, such as + * mdRef'd metadata. Key is relative pathname of file, value is + * InputStream with contents to put in it. Some + * superclasses will put streams in this table when adding an mdRef + * element to e.g. a rightsMD segment. */ - protected Map extraFiles = null; + protected class MdStreamCache + { + private Map extraFiles = new HashMap(); + public void addStream(MdRef key, InputStream md) + { + extraFiles.put(key, md); + } + + public Map getMap() + { + return extraFiles; + } + + public void close() + throws IOException + { + for (InputStream is : extraFiles.values()) + is.close(); + } + } + /** - * Make a new unique ID with specified prefix. + * Make a new unique ID symbol with specified prefix. * @param prefix the prefix of the identifier, constrained to XML ID schema * @return a new string identifier unique in this session (instance). */ - protected String gensym(String prefix) + protected synchronized String gensym(String prefix) { return prefix + "_" + String.valueOf(idCounter++); } public String getMIMEType(PackageParameters params) { - return (params != null && params.getProperty("manifestOnly") != null) ? + return (params != null && + (params.getBooleanProperty("manifestOnly", false))) ? "text/xml" : "application/zip"; } /** - * Export the object (Item, Collection, or Community) to a - * package file on the indicated OutputStream. - * Gets an exception of the object cannot be packaged or there is + * Export the object (Item, Collection, or Community) as a + * "package" on the indicated OutputStream. Package is any serialized + * representation of the item, at the discretion of the implementing + * class. It does not have to include content bitstreams. + *

+ * Use the params parameter list to adjust the way the + * package is made, e.g. including a "metadataOnly" + * parameter might make the package a bare manifest in XML + * instead of a Zip file including manifest and contents. + *

+ * Throws an exception of the chosen object is not acceptable or there is * a failure creating the package. * - * @param context - DSpace context. - * @param dso - DSpace object (item, collection, etc) - * @param pkg - output stream on which to write package - * @throws PackageException if package cannot be created or there is + * @param context DSpace context. + * @param object DSpace object (item, collection, etc) + * @param params Properties-style list of options specific to this packager + * @param pkgFile File where export package should be written + * @throws PackageValidationException if package cannot be created or there is * a fatal error in creating it. */ public void disseminate(Context context, DSpaceObject dso, - PackageParameters params, OutputStream pkg) + PackageParameters params, File pkgFile) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException { - if (dso.getType() == Constants.ITEM) + try { - Item item = (Item)dso; - long lmTime = item.getLastModified().getTime(); + //Make sure our package file exists + if(!pkgFile.exists()) + { + PackageUtils.createFile(pkgFile); + } - // how to handle unauthorized bundle/bitstream: - String unauth = (params == null) ? null : params.getProperty("unauthorized"); + //Open up an output stream to write to package file + FileOutputStream outStream = new FileOutputStream(pkgFile); - if (params != null && params.getProperty("manifestOnly") != null) + // Generate a true manifest-only "package", no external files/data & no need to zip up + if (params != null && params.getBooleanProperty("manifestOnly", false)) { - extraFiles = null; - writeManifest(context, item, params, pkg); + Mets manifest = makeManifest(context, dso, params, null); + manifest.validate(new MetsValidator()); + manifest.write(new MetsWriter(outStream)); } else { - extraFiles = new HashMap(); - ZipOutputStream zip = new ZipOutputStream(pkg); - zip.setComment("METS archive created by DSpace METSDisseminationCrosswalk"); + // make a Zip-based package + writeZipPackage(context, dso, params, outStream); + }//end if/else - // write manifest first. - ZipEntry me = new ZipEntry(MANIFEST_FILE); - me.setTime(lmTime); - zip.putNextEntry(me); - writeManifest(context, item, params, zip); - zip.closeEntry(); + //Close stream / stop writing to file + outStream.close(); + }//end try + catch (MetsException e) + { + // We don't pass up a MetsException, so callers don't need to + // know the details of the METS toolkit + log.error("METS error: ",e); + throw new PackageValidationException(e); + } + } + + + /** + * Make a Zipped up METS package for the given DSpace Object + * + * @param context DSpace Context + * @param dso The DSpace Object + * @param params Parameters to the Packager script + * @param pkg Package output stream + * @throws PackageValidationException + * @throws AuthorizeException + * @throws SQLException + * @throws IOException + */ + protected void writeZipPackage(Context context, DSpaceObject dso, PackageParameters params, OutputStream pkg) + throws PackageValidationException, CrosswalkException, MetsException, AuthorizeException, SQLException, IOException + { + long lmTime = 0; + if (dso.getType() == Constants.ITEM) + lmTime = ((Item)dso).getLastModified().getTime(); + + // map of extra streams to put in Zip (these are located during makeManifest()) + MdStreamCache extraStreams = new MdStreamCache(); + ZipOutputStream zip = new ZipOutputStream(pkg); + zip.setComment("METS archive created by DSpace METSDisseminationCrosswalk"); + Mets manifest = makeManifest(context, dso, params, extraStreams); + + // copy extra (metadata, license, etc) bitstreams into zip, update manifest + if (extraStreams != null) + { + for (Map.Entry ment : extraStreams.getMap().entrySet()) + { + MdRef ref = (MdRef)ment.getKey(); - // copy extra (meta?) bitstreams into zip - Iterator fi = extraFiles.keySet().iterator(); - while (fi.hasNext()) + // Both Deposit Licenses & CC Licenses which are referenced as "extra streams" may already be + // included in our Package (if their bundles are already included in the section of manifest). + // So, do a special check to see if we need to link up extra License entries to the bitstream in the . + // (this ensures that we don't accidentally add the same License file to our package twice) + linkLicenseRefsToBitstreams(context, params, dso, ref); + + //If this 'mdRef' is NOT already linked up to a file in the package, then its file must be missing. + // So, we are going to add a new file to the Zip package. + if(ref.getXlinkHref()==null || ref.getXlinkHref().isEmpty()) { - String fname = (String)fi.next(); + InputStream is = (InputStream)ment.getValue(); + + // create a hopefully unique filename within the Zip + String fname = gensym("metadata"); + // link up this 'mdRef' to point to that file + ref.setXlinkHref(fname); + if (log.isDebugEnabled()) + log.debug("Writing EXTRA stream to Zip: "+fname); + //actually add the file to the Zip package ZipEntry ze = new ZipEntry(fname); - ze.setTime(lmTime); + if (lmTime != 0) + ze.setTime(lmTime); zip.putNextEntry(ze); - Utils.copy((InputStream)extraFiles.get(fname), zip); + Utils.copy(is, zip); zip.closeEntry(); + + is.close(); } + } + } - // copy all non-meta bitstreams into zip - Bundle bundles[] = item.getBundles(); - for (int i = 0; i < bundles.length; i++) + // write manifest after metadata. + ZipEntry me = new ZipEntry(METSManifest.MANIFEST_FILE); + if (lmTime != 0) + me.setTime(lmTime); + zip.putNextEntry(me); + + // can only validate now after fixing up extraStreams + manifest.validate(new MetsValidator()); + manifest.write(new MetsWriter(zip)); + zip.closeEntry(); + + //write any bitstreams associated with DSpace object to zip package + addBitstreamsToZip(context, dso, params, zip); + + zip.close(); + + } + /** + * Add Bitstreams associated with a given DSpace Object into an + * existing ZipOutputStream + * @param context DSpace Context + * @param dso The DSpace Object + * @param params Parameters to the Packager script + * @param zip Zip output + */ + protected void addBitstreamsToZip(Context context, DSpaceObject dso, PackageParameters params, ZipOutputStream zip) + throws PackageValidationException, AuthorizeException, SQLException, IOException + { + // how to handle unauthorized bundle/bitstream: + String unauth = (params == null) ? null : params.getProperty("unauthorized"); + + // copy all non-meta bitstreams into zip + if (dso.getType() == Constants.ITEM) + { + Item item = (Item)dso; + + //get last modified time + long lmTime = ((Item)dso).getLastModified().getTime(); + + Bundle bundles[] = item.getBundles(); + for (int i = 0; i < bundles.length; i++) + { + if (includeBundle(bundles[i])) { - if (!PackageUtils.isMetaInfoBundle(bundles[i])) + // unauthorized bundle? + if (!AuthorizeManager.authorizeActionBoolean(context, + bundles[i], Constants.READ)) { - // unauthorized bundle? - if (!AuthorizeManager.authorizeActionBoolean(context, - bundles[i], Constants.READ)) + if (unauth != null && + (unauth.equalsIgnoreCase("skip"))) { - if (unauth != null && - (unauth.equalsIgnoreCase("skip"))) - { - log.warn("Skipping Bundle[\""+bundles[i].getName()+"\"] because you are not authorized to read it."); - continue; - } - else - throw new AuthorizeException("Not authorized to read Bundle named \""+bundles[i].getName()+"\""); + log.warn("Skipping Bundle[\""+bundles[i].getName()+"\"] because you are not authorized to read it."); + continue; } - Bitstream[] bitstreams = bundles[i].getBitstreams(); - for (int k = 0; k < bitstreams.length; k++) + else + throw new AuthorizeException("Not authorized to read Bundle named \""+bundles[i].getName()+"\""); + } + Bitstream[] bitstreams = bundles[i].getBitstreams(); + for (int k = 0; k < bitstreams.length; k++) + { + boolean auth = AuthorizeManager.authorizeActionBoolean(context, + bitstreams[k], Constants.READ); + if (auth || + (unauth != null && unauth.equalsIgnoreCase("zero"))) { - boolean auth = AuthorizeManager.authorizeActionBoolean(context, - bitstreams[k], Constants.READ); - if (auth || - (unauth != null && unauth.equalsIgnoreCase("zero"))) - { - ZipEntry ze = new ZipEntry( - makeBitstreamName(bitstreams[k])); + String zname = makeBitstreamURL(bitstreams[k], params); + ZipEntry ze = new ZipEntry(zname); + if (log.isDebugEnabled()) + log.debug("Writing CONTENT stream of bitstream("+String.valueOf(bitstreams[k].getID())+") to Zip: "+zname+ + ", size="+String.valueOf(bitstreams[k].getSize())); + if (lmTime != 0) ze.setTime(lmTime); - ze.setSize(auth ? bitstreams[k].getSize() : 0); - zip.putNextEntry(ze); - if (auth) + ze.setSize(auth ? bitstreams[k].getSize() : 0); + zip.putNextEntry(ze); + if (auth) Utils.copy(bitstreams[k].retrieve(), zip); - else - log.warn("Adding zero-length file for Bitstream, SID="+String.valueOf(bitstreams[k].getSequenceID())+", not authorized for READ."); - zip.closeEntry(); - } - else if (unauth != null && - unauth.equalsIgnoreCase("skip")) - { - log.warn("Skipping Bitstream, SID="+String.valueOf(bitstreams[k].getSequenceID())+", not authorized for READ."); - } else - { - throw new AuthorizeException("Not authorized to read Bitstream, SID="+String.valueOf(bitstreams[k].getSequenceID())); - } + log.warn("Adding zero-length file for Bitstream, SID="+String.valueOf(bitstreams[k].getSequenceID())+", not authorized for READ."); + zip.closeEntry(); } + else if (unauth != null && + unauth.equalsIgnoreCase("skip")) + { + log.warn("Skipping Bitstream, SID="+String.valueOf(bitstreams[k].getSequenceID())+", not authorized for READ."); + } + else + { + throw new AuthorizeException("Not authorized to read Bitstream, SID="+String.valueOf(bitstreams[k].getSequenceID())); + } } } - zip.close(); - extraFiles = null; } + } + // Coll, Comm just add logo bitstream to content if there is one + else if (dso.getType() == Constants.COLLECTION || + dso.getType() == Constants.COMMUNITY) + { + Bitstream logoBs = dso.getType() == Constants.COLLECTION ? + ((Collection)dso).getLogo() : + ((Community)dso).getLogo(); + if (logoBs != null) + { + String zname = makeBitstreamURL(logoBs, params); + ZipEntry ze = new ZipEntry(zname); + if (log.isDebugEnabled()) + log.debug("Writing CONTENT stream of bitstream("+String.valueOf(logoBs.getID())+") to Zip: "+zname+", size="+String.valueOf(logoBs.getSize())); + ze.setSize(logoBs.getSize()); + zip.putNextEntry(ze); + Utils.copy(logoBs.retrieve(), zip); + zip.closeEntry(); + } } - else - throw new PackageValidationException("Can only disseminate an Item now."); } - /** - * Create name that bitstream will have in archive. Name must - * be unique and relative to archive top level, e.g. "bitstream_.ext" - */ - private String makeBitstreamName(Bitstream bitstream) - { - String base = "bitstream_"+String.valueOf(bitstream.getID()); - String ext[] = bitstream.getFormat().getExtensions(); - return (ext.length > 0) ? base+"."+ext[0] : base; - } - - // set metadata type - if Mdtype.parse() gets exception, // that means it's not in the MDTYPE vocabulary, so use OTHER. - private void setMdType(MdWrap mdWrap, String mdtype) + protected void setMdType(MdWrap mdWrap, String mdtype) { try { @@ -303,144 +452,299 @@ } } + // set metadata type - if Mdtype.parse() gets exception, + // that means it's not in the MDTYPE vocabulary, so use OTHER. + protected void setMdType(MdRef mdRef, String mdtype) + { + try + { + mdRef.setMDTYPE(Mdtype.parse(mdtype)); + } + catch (MetsException e) + { + mdRef.setMDTYPE(Mdtype.OTHER); + mdRef.setOTHERMDTYPE(mdtype); + } + } + + /** - * Write out a METS manifest. - * Mostly lifted from Rob Tansley's METS exporter. + * Create an element wrapped around a metadata reference (either mdWrap + * or mdRef); i.e. dmdSec, techMd, sourceMd, etc. Checks for + * XML-DOM oriented crosswalk first, then if not found looks for + * stream crosswalk of the same name. + * + * @param context DSpace Context + * @param dso DSpace Object we are generating METS manifest for + * @param mdSecClass class of mdSec (TechMD, RightsMD, DigiProvMD, etc) + * @param typeSpec Type of metadata going into this mdSec (e.g. MODS, DC, PREMIS, etc) + * @param extraStreams list of extra files which need to be added to final dissemination package + * @return mdSec element or null if xwalk returns empty results. + * @throws SQLException + * @throws PackageValidationException + * @throws CrosswalkException + * @throws IOException + * @throws AuthorizeException */ - private void writeManifest(Context context, Item item, - PackageParameters params, OutputStream out) - throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException - + protected MdSec makeMdSec(Context context, DSpaceObject dso, Class mdSecClass, String typeSpec, + MdStreamCache extraStreams) + throws SQLException, PackageValidationException, CrosswalkException, + IOException, AuthorizeException { try { - // Create the METS file - Mets mets = new Mets(); - - // Top-level stuff - mets.setID(gensym("mets")); - mets.setOBJID("hdl:" + item.getHandle()); - mets.setLABEL("DSpace Item"); - mets.setPROFILE(getProfile()); - - // MetsHdr - MetsHdr metsHdr = new MetsHdr(); - metsHdr.setCREATEDATE(new Date()); // FIXME: CREATEDATE is now: - // maybe should be item create - // date? + //create our metadata element (dmdSec, techMd, sourceMd, rightsMD etc.) + MdSec mdSec = (MdSec) mdSecClass.newInstance(); + mdSec.setID(gensym(mdSec.getLocalName())); + String parts[] = typeSpec.split(":", 2); + String xwalkName, metsName; - // Agent - Agent agent = new Agent(); - agent.setROLE(Role.CUSTODIAN); - agent.setTYPE(Type.ORGANIZATION); - Name name = new Name(); - name.getContent() - .add(new PCData(ConfigurationManager - .getProperty("dspace.name"))); - agent.getContent().add(name); - metsHdr.getContent().add(agent); - mets.getContent().add(metsHdr); - - // add DMD sections - // Each type element MAY be either just a MODS-and-crosswalk name, OR - // a combination "MODS-name:crosswalk-name" (e.g. "DC:qDC"). - String dmdTypes[] = getDmdTypes(params); + //determine the name of the crosswalk to use to generate metadata + // for dmdSecs this is the part *after* the colon in the 'type' (see getDmdTypes()) + // for all others this is usually just corresponds to type name. + if (parts.length > 1) + { + metsName = parts[0]; + xwalkName = parts[1]; + } + else + xwalkName = metsName = typeSpec; - // record of ID of each dmdsec to make DMDID in structmap. - String dmdGroup = gensym("dmd_group"); - String dmdId[] = new String[dmdTypes.length]; - for (int i = 0; i < dmdTypes.length; ++i) + // First, check to see if the crosswalk we are using is a normal DisseminationCrosswalk + DisseminationCrosswalk xwalk = (DisseminationCrosswalk) + PluginManager.getNamedPlugin(DisseminationCrosswalk.class, xwalkName); + + // If we found the correct crosswalk, run it! + if (xwalk != null) { - dmdId[i] = gensym("dmd"); + //For a normal DisseminationCrosswalk, we will be expecting an XML (DOM) based result. + // So, we are going to wrap this XML result in an element + MdWrap mdWrap = new MdWrap(); + setMdType(mdWrap, metsName); XmlData xmlData = new XmlData(); - String xwalkName, metsName; - String parts[] = dmdTypes[i].split(":", 2); - if (parts.length > 1) + if (crosswalkToMetsElement(xwalk, dso, xmlData) != null) { - metsName = parts[0]; - xwalkName = parts[1]; + mdWrap.getContent().add(xmlData); + mdSec.getContent().add(mdWrap); + return mdSec; } else - xwalkName = metsName = dmdTypes[i]; + return null; + } + // If we didn't find the correct crosswalk, we will check to see if this is + // a StreamDisseminationCrosswalk -- a Stream crosswalk disseminates to an OutputStream + else + { + StreamDisseminationCrosswalk sxwalk = (StreamDisseminationCrosswalk) + PluginManager.getNamedPlugin(StreamDisseminationCrosswalk.class, xwalkName); + if (sxwalk != null) + { + if (sxwalk.canDisseminate(context, dso)) + { + // Disseminate crosswalk output to an outputstream + ByteArrayOutputStream disseminateOutput = new ByteArrayOutputStream(); + sxwalk.disseminate(context, dso, disseminateOutput); + // Convert output to an inputstream, so we can write to manifest or Zip file + ByteArrayInputStream crosswalkedStream = new ByteArrayInputStream(disseminateOutput.toByteArray()); + + //If we are capturing extra files to put into a Zip package + if(extraStreams!=null) + { + //Create an -- we'll just reference the file by name in Zip package + MdRef mdRef = new MdRef(); + //add the crosswalked Stream to list of files to add to Zip package later + extraStreams.addStream(mdRef, crosswalkedStream); + + //set properties on + // Note, filename will get set on this later, + // when we process all the 'extraStreams' + mdRef.setMIMETYPE(sxwalk.getMIMEType()); + setMdType(mdRef, metsName); + mdRef.setLOCTYPE(Loctype.URL); + mdSec.getContent().add(mdRef); + } + else + { + //If we are *not* capturing extra streams to add to Zip package later, + // that means we are likely only generating a METS manifest + // (i.e. manifestOnly = true) + // In this case, the best we can do is take the crosswalked + // Stream, base64 encode it, and add in an field - DisseminationCrosswalk xwalk = (DisseminationCrosswalk) - PluginManager.getNamedPlugin(DisseminationCrosswalk.class, xwalkName); - if (xwalk == null) - throw new PackageValidationException("Cannot find "+dmdTypes[i]+" crosswalk plugin!"); - else - crosswalkToMets(xwalk, item, xmlData); + // First, create our + MdWrap mdWrap = new MdWrap(); + mdWrap.setMIMETYPE(sxwalk.getMIMEType()); + setMdType(mdWrap, metsName); - DmdSec dmdSec = new DmdSec(); - dmdSec.setID(dmdId[i]); - dmdSec.setGROUPID(dmdGroup); - MdWrap mdWrap = new MdWrap(); - setMdType(mdWrap, metsName); - mdWrap.getContent().add(xmlData); - dmdSec.getContent().add(mdWrap); - mets.getContent().add(dmdSec); - } - - // Only add license AMD section if there are any licenses. - // Catch authorization failures accessing license bitstreams - // only if we are skipping unauthorized bitstreams. - String licenseID = null; - try - { - AmdSec amdSec = new AmdSec(); - addRightsMd(context, item, amdSec); - if (amdSec.getContent().size() > 0) - { - licenseID = gensym("license"); - amdSec.setID(licenseID); - mets.getContent().add(amdSec); - } - } - catch (AuthorizeException e) - { - String unauth = (params == null) ? null : params.getProperty("unauthorized"); - if (!(unauth != null && unauth.equalsIgnoreCase("skip"))) - throw e; + // Now, create our and add base64 encoded contents to it. + BinData binData = new BinData(); + Base64 base64 = new Base64(crosswalkedStream); + binData.getContent().add(base64); + mdWrap.getContent().add(binData); + mdSec.getContent().add(mdWrap); + } + return mdSec; + } + else + return null; + } else - log.warn("Skipping license metadata because of access failure: "+e.toString()); + throw new PackageValidationException("Cannot find "+xwalkName+" crosswalk plugin, either DisseminationCrosswalk or StreamDisseminationCrosswalk"); } + } + catch (InstantiationException e) + { + throw new PackageValidationException("Error instantiating Mdsec object: "+ e.toString()); + } + catch (IllegalAccessException e) + { + throw new PackageValidationException("Error instantiating Mdsec object: "+ e.toString()); + } + } - // FIXME: History data???? Nooooo!!!! + // add either a techMd or sourceMd element to amdSec. + // mdSecClass determines which type. + // mdTypes[] is array of "[metsName:]PluginName" strings, maybe empty. + protected void addToAmdSec(AmdSec fAmdSec, String mdTypes[], Class mdSecClass, + Context context, DSpaceObject dso, MdStreamCache extraStreams) + throws SQLException, PackageValidationException, CrosswalkException, + IOException, AuthorizeException + { + for (int i = 0; i < mdTypes.length; ++i) + { + MdSec md = makeMdSec(context, dso, mdSecClass, mdTypes[i], extraStreams); + if (md != null) + fAmdSec.getContent().add(md); + } + } - // fileSec - all non-metadata bundles go into fileGrp, - // and each bitstream therein into a file. - // Create the bitstream-level techMd and div's for structmap - // at the same time so we can connec the IDREFs to IDs. - FileSec fileSec = new FileSec(); + // Create amdSec for any tech md's, return its ID attribute. + protected String addAmdSec(Context context, DSpaceObject dso, PackageParameters params, + Mets mets, MdStreamCache extraStreams) + throws SQLException, PackageValidationException, CrosswalkException, + IOException, AuthorizeException + { + String techMdTypes[] = getTechMdTypes(context, dso, params); + String rightsMdTypes[] = getRightsMdTypes(context, dso, params); + String sourceMdTypes[] = getSourceMdTypes(context, dso, params); + String digiprovMdTypes[] = getDigiprovMdTypes(context, dso, params); - String techMdType = getTechMdType(params); - String parts[] = techMdType.split(":", 2); - String xwalkName, metsName; - if (parts.length > 1) + // only bother if there are any sections to add + if ((techMdTypes.length+sourceMdTypes.length+ + digiprovMdTypes.length+rightsMdTypes.length) > 0) + { + String result = gensym("amd"); + AmdSec fAmdSec = new AmdSec(); + fAmdSec.setID(result); + addToAmdSec(fAmdSec, techMdTypes, TechMD.class, context, dso, extraStreams); + addToAmdSec(fAmdSec, rightsMdTypes, RightsMD.class, context, dso, extraStreams); + addToAmdSec(fAmdSec, sourceMdTypes, SourceMD.class, context, dso, extraStreams); + addToAmdSec(fAmdSec, digiprovMdTypes, DigiprovMD.class, context, dso, extraStreams); + + mets.getContent().add(fAmdSec); + return result; + } + else + return null; + } + + // make the most "persistent" identifier possible, preferably a URN + // based on the Handle. + protected String makePersistentID(DSpaceObject dso) + { + String handle = dso.getHandle(); + + // If no Handle, punt to much-less-satisfactory database ID and type.. + if (handle == null) + return "DSpace_DB_"+Constants.typeText[dso.getType()] + "_" + String.valueOf(dso.getID()); + else + return getHandleURN(handle); + } + + /** + * Write out a METS manifest. + * Mostly lifted from Rob Tansley's METS exporter. + */ + protected Mets makeManifest(Context context, DSpaceObject dso, + PackageParameters params, + MdStreamCache extraStreams) + throws MetsException, PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException + + { + // Create the METS manifest in memory + Mets mets = new Mets(); + String typeStr = Constants.typeText[dso.getType()]; + + // this ID should be globally unique + mets.setID("dspace"+Utils.generateKey()); + + // identifies the object described by this document + mets.setOBJID(makePersistentID(dso)); + mets.setTYPE("DSpace "+typeStr); + + // this is the signature by which the ingester will recognize + // a document it can expect to interpret. + mets.setPROFILE(getProfile()); + + MetsHdr metsHdr = makeMetsHdr(context, dso, params); + if (metsHdr != null) + mets.getContent().add(metsHdr); + + // add DMD sections + // Each type element MAY be either just a MODS-and-crosswalk name, OR + // a combination "MODS-name:crosswalk-name" (e.g. "DC:qDC"). + String dmdTypes[] = getDmdTypes(context, dso, params); + + // record of ID of each dmdsec to make DMDID in structmap. + String dmdId[] = new String[dmdTypes.length]; + for (int i = 0; i < dmdTypes.length; ++i) + { + MdSec dmdSec = makeMdSec(context, dso, DmdSec.class, dmdTypes[i], extraStreams); + if (dmdSec != null) { - metsName = parts[0]; - xwalkName = parts[1]; + mets.getContent().add(dmdSec); + dmdId[i] = dmdSec.getID(); } - else - xwalkName = metsName = techMdType; + } + + // add object-wide technical/source MD segments, get ID string: + // Put that ID in ADMID of first div in structmap. + String objectAMDID = addAmdSec(context, dso, params, mets, extraStreams); - DisseminationCrosswalk xwalk = (DisseminationCrosswalk) - PluginManager.getNamedPlugin(DisseminationCrosswalk.class, xwalkName); - if (xwalk == null) - throw new PackageValidationException("Cannot find "+xwalkName+" crosswalk plugin!"); + // Create simple structMap: initial div represents the Object's + // contents, its children are e.g. Item bitstreams (content only), + // Collection's members, or Community's members. + StructMap structMap = new StructMap(); + structMap.setID(gensym("struct")); + structMap.setTYPE("LOGICAL"); + structMap.setLABEL("DSpace Object"); + Div div0 = new Div(); + div0.setID(gensym("div")); + div0.setTYPE("DSpace Object Contents"); + structMap.getContent().add(div0); - // log the primary bitstream for structmap - String primaryBitstreamFileID = null; + // fileSec is optional, let object type create it if needed. + FileSec fileSec = null; - // accumulate content DIV items to put in structMap later. - List contentDivs = new ArrayList(); + // Item-specific manifest - license, bitstreams as Files, etc. + if (dso.getType() == Constants.ITEM) + { + // this tags file ID and group identifiers for bitstreams. + String bitstreamIDstart = "bitstream_"; + Item item = (Item)dso; // how to handle unauthorized bundle/bitstream: String unauth = (params == null) ? null : params.getProperty("unauthorized"); + // fileSec - all non-metadata bundles go into fileGrp, + // and each bitstream therein into a file. + // Create the bitstream-level techMd and div's for structmap + // at the same time so we can connec the IDREFs to IDs. + fileSec = new FileSec(); Bundle[] bundles = item.getBundles(); for (int i = 0; i < bundles.length; i++) { - if (PackageUtils.isMetaInfoBundle(bundles[i])) + if (!includeBundle(bundles[i])) continue; // unauthorized bundle? @@ -457,10 +761,8 @@ Bitstream[] bitstreams = bundles[i].getBitstreams(); - // Create a fileGrp + // Create a fileGrp, USE = permuted Bundle name FileGrp fileGrp = new FileGrp(); - - // Bundle name for USE attribute String bName = bundles[i].getName(); if ((bName != null) && !bName.equals("")) fileGrp.setUSE(bundleToFileGrp(bName)); @@ -492,39 +794,30 @@ } String sid = String.valueOf(bitstreams[bits].getSequenceID()); - + String fileID = bitstreamIDstart + sid; edu.harvard.hul.ois.mets.File file = new edu.harvard.hul.ois.mets.File(); - - String xmlIDstart = "bitstream_"; - String fileID = xmlIDstart + sid; - file.setID(fileID); + file.setSEQ(bitstreams[bits].getSequenceID()); + fileGrp.getContent().add(file); - // log primary bitstream for later (structMap) + // set primary bitstream in structMap if (bitstreams[bits].getID() == primaryBitstreamID) - primaryBitstreamFileID = fileID; - - // if this is content, add to structmap too: - if (isContentBundle) { - Div div = new Div(); - div.setID(gensym("div")); - div.setTYPE("DSpace Content Bitstream"); Fptr fptr = new Fptr(); fptr.setFILEID(fileID); - div.getContent().add(fptr); - contentDivs.add(div); + div0.getContent().add(0, fptr); } - file.setSEQ(bitstreams[bits].getSequenceID()); + // if this is content, add to structmap too: + if (isContentBundle) + div0.getContent().add(makeFileDiv(fileID, "DSpace Content Bitstream")); - String groupID = "GROUP_" + xmlIDstart + sid; - /* * If we're in THUMBNAIL or TEXT bundles, the bitstream is * extracted text or a thumbnail, so we use the name to work * out which bitstream to be in the same group as */ + String groupID = "GROUP_" + bitstreamIDstart + sid; if ((bundles[i].getName() != null) && (bundles[i].getName().equals("THUMBNAIL") || bundles[i].getName().startsWith("TEXT"))) @@ -533,14 +826,12 @@ // derived bitstream in the same group Bitstream original = findOriginalBitstream(item, bitstreams[bits]); - if (original != null) { - groupID = "GROUP_" + xmlIDstart + groupID = "GROUP_" + bitstreamIDstart + original.getSequenceID(); } } - file.setGROUPID(groupID); file.setMIMETYPE(bitstreams[bits].getFormat().getMIMEType()); @@ -548,7 +839,7 @@ file.setSIZE(auth ? bitstreams[bits].getSize() : 0); - // translate checksum and type to METS, if available. + // FIXME: need to translate checksum and type to METS, if available. String csType = bitstreams[bits].getChecksumAlgorithm(); String cs = bitstreams[bits].getChecksum(); if (auth && cs != null && csType != null) @@ -564,86 +855,218 @@ } } - // FLocat: filename is MD5 checksum + // FLocat: point to location of bitstream contents. FLocat flocat = new FLocat(); flocat.setLOCTYPE(Loctype.URL); - flocat.setXlinkHref(makeBitstreamName(bitstreams[bits])); + flocat.setXlinkHref(makeBitstreamURL(bitstreams[bits], params)); + file.getContent().add(flocat); - // Make bitstream techMD metadata, add to file. - String techID = "techMd_for_bitstream_"+bitstreams[bits].getSequenceID(); - AmdSec fAmdSec = new AmdSec(); - fAmdSec.setID(techID); - TechMD techMd = new TechMD(); - techMd.setID(gensym("tech")); - MdWrap mdWrap = new MdWrap(); - setMdType(mdWrap, metsName); - XmlData xmlData = new XmlData(); - mdWrap.getContent().add(xmlData); - techMd.getContent().add(mdWrap); - fAmdSec.getContent().add(techMd); - mets.getContent().add(fAmdSec); - crosswalkToMets(xwalk, bitstreams[bits], xmlData); + // technical metadata for bitstream + String techID = addAmdSec(context, bitstreams[bits], params, mets, extraStreams); + if (techID != null) file.setADMID(techID); - - // Add FLocat to File, and File to FileGrp - file.getContent().add(flocat); - fileGrp.getContent().add(file); } - - // Add fileGrp to fileSec fileSec.getContent().add(fileGrp); } - - // Add fileSec to document + } + else if (dso.getType() == Constants.COLLECTION) + { + ItemIterator ii = ((Collection)dso).getItems(); + while (ii.hasNext()) + { + //add a child

for each item in collection + Item item = ii.next(); + Div childDiv = makeChildDiv("DSpace Item", item, params); + if(childDiv!=null) + div0.getContent().add(childDiv); + } + Bitstream logoBs = ((Collection)dso).getLogo(); + if (logoBs != null) + { + fileSec = new FileSec(); + addLogoBitstream(logoBs, fileSec, div0, params); + } + } + else if (dso.getType() == Constants.COMMUNITY) + { + // Subcommunities are directly under "DSpace Object Contents"
, but are labeled as Communities + Community subcomms[] = ((Community)dso).getSubcommunities(); + for (int i = 0; i < subcomms.length; ++i) + { + //add a child
for each subcommunity in this community + Div childDiv = makeChildDiv("DSpace Community", subcomms[i], params); + if(childDiv!=null) + div0.getContent().add(childDiv); + } + // Collections are also directly under "DSpace Object Contents"
, but are labeled as Collections + Collection colls[] = ((Community)dso).getCollections(); + for (int i = 0; i < colls.length; ++i) + { + //add a child
for each collection in this community + Div childDiv = makeChildDiv("DSpace Collection", colls[i], params); + if(childDiv!=null) + div0.getContent().add(childDiv); + } + //add Community logo bitstream + Bitstream logoBs = ((Community)dso).getLogo(); + if (logoBs != null) + { + fileSec = new FileSec(); + addLogoBitstream(logoBs, fileSec, div0, params); + } + } + else if (dso.getType() == Constants.SITE) + { + // This is a site-wide , which just lists the top-level communities + // each top level community is referenced by a div + Community comms[] = Community.findAllTop(context); + for (int i = 0; i < comms.length; ++i) + { + //add a child
for each top level community in this site + Div childDiv = makeChildDiv("DSpace Community", comms[i], params); + if(childDiv!=null) + div0.getContent().add(childDiv); + } + } + if (fileSec != null) mets.getContent().add(fileSec); + mets.getContent().add(structMap); - // Create simple structMap: initial div represents the Item, - // and user-visible content bitstreams are in its child divs. - StringBuffer dmdIds = new StringBuffer(); - for (int i = 0; i < dmdId.length; ++i) - dmdIds.append(" "+dmdId[i]); - StructMap structMap = new StructMap(); - structMap.setID(gensym("struct")); - structMap.setTYPE("LOGICAL"); - structMap.setLABEL("DSpace"); - Div div0 = new Div(); - div0.setID(gensym("div")); - div0.setTYPE("DSpace Item"); - div0.setDMDID(dmdIds.substring(1)); - if (licenseID != null) - div0.setADMID(licenseID); + // set links to metadata for object -- after type-specific + // code since that can add to the object metadata. + StringBuffer dmdIds = new StringBuffer(); + for (int i = 0; i < dmdId.length; ++i) + dmdIds.append(" "+dmdId[i]); + div0.setDMDID(dmdIds.substring(1)); + if (objectAMDID != null) + div0.setADMID(objectAMDID); - // if there is a primary bitstream, add FPTR to it. - if (primaryBitstreamFileID != null) + // Does subclass have something to add to structMap? + addStructMap(context, dso, params, mets); + + return mets; + } + + // Install logo bitstream into METS for Community, Collection. + // Add a file element, and refer to it from an fptr in the first div + // of the main structMap. + protected void addLogoBitstream(Bitstream logoBs, FileSec fileSec, Div div0, PackageParameters params) + { + edu.harvard.hul.ois.mets.File file = new edu.harvard.hul.ois.mets.File(); + String fileID = gensym("logo"); + file.setID(fileID); + file.setMIMETYPE(logoBs.getFormat().getMIMEType()); + file.setSIZE(logoBs.getSize()); + + // FIXME: need to translate checksum and type to METS, if available. + String csType = logoBs.getChecksumAlgorithm(); + String cs = logoBs.getChecksum(); + if (cs != null && csType != null) + { + try { - Fptr fptr = new Fptr(); - fptr.setFILEID(primaryBitstreamFileID); - div0.getContent().add(fptr); + file.setCHECKSUMTYPE(Checksumtype.parse(csType)); + file.setCHECKSUM(cs); } + catch (MetsException e) + { + log.warn("Cannot set bitstream checksum type="+csType+" in METS."); + } + } + FLocat flocat = new FLocat(); + flocat.setLOCTYPE(Loctype.URL); + flocat.setXlinkHref(makeBitstreamURL(logoBs, params)); + file.getContent().add(flocat); + FileGrp fileGrp = new FileGrp(); + fileGrp.setUSE("LOGO"); + fileGrp.getContent().add(file); + fileSec.getContent().add(fileGrp); - // add DIV for each content bitstream - div0.getContent().addAll(contentDivs); + // add fptr directly to div0 of structMap + Fptr fptr = new Fptr(); + fptr.setFILEID(fileID); + div0.getContent().add(0, fptr); + } - structMap.getContent().add(div0); + // create
element pointing to a file + protected Div makeFileDiv(String fileID, String type) + { + Div div = new Div(); + div.setID(gensym("div")); + div.setTYPE(type); + Fptr fptr = new Fptr(); + fptr.setFILEID(fileID); + div.getContent().add(fptr); + return div; + } - // Does subclass have something to add to structMap? - addStructMap(context, item, params, mets); + /** + * Create a
element with which references a child + * object via its handle (and via a local file name, when recursively disseminating + * all child objects). + * @param type - type attr value for the
+ * @param dso - object for which to create the div + * @param params + * @return + */ + protected Div makeChildDiv(String type, DSpaceObject dso, PackageParameters params) + { + String handle = dso.getHandle(); + + //start
+ Div div = new Div(); + div.setID(gensym("div")); + div.setTYPE(type); + boolean emptyDiv = true; - mets.getContent().add(structMap); - - mets.validate(new MetsValidator()); - - mets.write(new MetsWriter(out)); + //make sure we have a handle + if (handle == null || handle.length()==0) + { + log.warn("METS Disseminator is skipping "+type+" without handle: " + dso.toString()); } - catch (MetsException e) + else { - // We don't pass up a MetsException, so callers don't need to - // know the details of the METS toolkit - // e.printStackTrace(); - throw new PackageValidationException(e); + //create with handle reference + Mptr mptr = new Mptr(); + mptr.setID(gensym("mptr")); + mptr.setLOCTYPE(Loctype.HANDLE); + mptr.setXlinkHref(handle); + div.getContent().add(mptr); + emptyDiv=false; } + + // Check to see if this is a recursive dissemination (i.e. disseminating children METS packages) + // if so, we want a direct reference to the eventual child METS file + if(params.recursiveModeEnabled()) + { + //determine file extension of child references, + //based on whether we are exporting just a manifest or a full Zip pkg + String childFileExtension = (params!=null && params.getBooleanProperty("manifestOnly", false)) ? "xml" : "zip"; + + //create with file-name reference to child package + Mptr mptr2 = new Mptr(); + mptr2.setID(gensym("mptr")); + mptr2.setLOCTYPE(Loctype.URL); + //we get the name of the child package from the Packager -- as it is what will actually create this child pkg file + mptr2.setXlinkHref(PackageUtils.getPackageName(dso, childFileExtension)); + div.getContent().add(mptr2); + emptyDiv=false; + } + + if(emptyDiv) + return null; + else + return div; } + // put handle in canonical URN format -- note that HandleManager's + // canonicalize currently returns HTTP URL format. + protected String getHandleURN(String handle) + { + if (handle.startsWith("hdl:")) + return handle; + return "hdl:"+handle; + } /** * For a bitstream that's a thumbnail or extracted text, find the @@ -691,34 +1114,145 @@ // Get result from crosswalk plugin and add it to the document, // including namespaces and schema. - private void crosswalkToMets(DisseminationCrosswalk xwalk, + // returns the new/modified element upon success. + private MetsElement crosswalkToMetsElement(DisseminationCrosswalk xwalk, DSpaceObject dso, MetsElement me) throws CrosswalkException, IOException, SQLException, AuthorizeException { - // add crosswalk's namespaces and schemaLocation to this element: - String raw = xwalk.getSchemaLocation(); - String sloc[] = raw == null ? null : raw.split("\\s+"); - Namespace ns[] = xwalk.getNamespaces(); - for (int i = 0; i < ns.length; ++i) + try { - String uri = ns[i].getURI(); - if (sloc != null && sloc.length > 1 && uri.equals(sloc[0])) - me.setSchema(ns[i].getPrefix(), uri, sloc[1]); + // add crosswalk's namespaces and schemaLocation to this element: + String raw = xwalk.getSchemaLocation(); + String sloc[] = raw == null ? null : raw.split("\\s+"); + Namespace ns[] = xwalk.getNamespaces(); + for (int i = 0; i < ns.length; ++i) + { + String uri = ns[i].getURI(); + if (sloc != null && sloc.length > 1 && uri.equals(sloc[0])) + me.setSchema(ns[i].getPrefix(), uri, sloc[1]); + else + me.setSchema(ns[i].getPrefix(), uri); + } + + // add result of crosswalk + PreformedXML pXML = null; + if (xwalk.preferList()) + { + List res = xwalk.disseminateList(dso); + if (!(res == null || res.isEmpty())) + pXML = new PreformedXML(outputter.outputString(res)); + } else - me.setSchema(ns[i].getPrefix(), uri); + { + Element res = xwalk.disseminateElement(dso); + if (res != null) + pXML = new PreformedXML(outputter.outputString(res)); + } + if (pXML != null) + { + me.getContent().add(pXML); + return me; + } + return null; } + catch (CrosswalkObjectNotSupported e) + { + // ignore this xwalk if object is unsupported. + if (log.isDebugEnabled()) + log.debug("Skipping MDsec because of CrosswalkObjectNotSupported: dso="+dso.toString()+", xwalk="+xwalk.getClass().getName()); + return null; + } + } - // add result of crosswalk - PreformedXML pXML = - new PreformedXML( - xwalk.preferList() ? - outputter.outputString(xwalk.disseminateList(dso)) : - outputter.outputString(xwalk.disseminateElement(dso))); - me.getContent().add(pXML); + /** + * Cleanup our license file reference links, as Deposit Licenses & CC Licenses can be + * added two ways (and we only want to add them to zip package *once*): + * (1) Added as a normal Bitstream (assuming LICENSE and CC_LICENSE bundles will be included in pkg) + * (2) Added via a 'rightsMD' crosswalk (as they are rights information/metadata on an Item) + *

+ * So, if they are being added by *both*, then we want to just link the rightsMD entry so + * that it points to the Bitstream location. This implementation is a bit 'hackish', but it's + * the best we can do, as the Harvard METS API doesn't allow us to go back and crawl an entire + * METS file to look for these inconsistencies/duplications. + * + * @param context current DSpace Context + * @param params current Packager Parameters + * @param dso current DSpace Object + * @param ref the rightsMD element + * @throws SQLException + * @throws IOException + * @throws AuthorizeException + */ + protected void linkLicenseRefsToBitstreams(Context context, PackageParameters params, DSpaceObject dso, MdRef mdRef) + throws SQLException, IOException, AuthorizeException + { + //If this is a reference to a DSpace Deposit License + if(mdRef.getMDTYPE()!=null && mdRef.getMDTYPE()==Mdtype.OTHER && + mdRef.getOTHERMDTYPE()!=null && mdRef.getOTHERMDTYPE().equals("DSpaceDepositLicense")) + { + //Locate the LICENSE bundle + Item i = (Item)dso; + Bundle license[] = i.getBundles(Constants.LICENSE_BUNDLE_NAME); + + //Are we already including the LICENSE bundle's bitstreams in this package? + if(license!=null && license.length>0 && includeBundle(license[0])) + { + //Since we are including the LICENSE bitstreams, lets find our LICENSE bitstream path & link to it. + Bitstream licenseBs = PackageUtils.findDepositLicense(context, (Item)dso); + mdRef.setXlinkHref(makeBitstreamURL(licenseBs, params)); + } + } + //If this is a reference to a Creative Commons Textual License + else if(mdRef.getMDTYPE() != null && mdRef.getMDTYPE() == Mdtype.OTHER && + mdRef.getOTHERMDTYPE()!=null && mdRef.getOTHERMDTYPE().equals("CreativeCommonsText")) + { + //Locate the CC-LICENSE bundle + Item i = (Item)dso; + Bundle license[] = i.getBundles(CreativeCommons.CC_BUNDLE_NAME); + + //Are we already including the CC-LICENSE bundle's bitstreams in this package? + if(license!=null && license.length>0 && includeBundle(license[0])) + { + //Since we are including the CC-LICENSE bitstreams, lets find our CC-LICENSE (textual) bitstream path & link to it. + Bitstream ccText = CreativeCommons.getLicenseTextBitstream(i); + mdRef.setXlinkHref(makeBitstreamURL(ccText, params)); + } + } + //If this is a reference to a Creative Commons RDF License + else if(mdRef.getMDTYPE() != null && mdRef.getMDTYPE() == Mdtype.OTHER && + mdRef.getOTHERMDTYPE()!=null && mdRef.getOTHERMDTYPE().equals("CreativeCommonsRDF")) + { + //Locate the CC-LICENSE bundle + Item i = (Item)dso; + Bundle license[] = i.getBundles(CreativeCommons.CC_BUNDLE_NAME); + + //Are we already including the CC-LICENSE bundle's bitstreams in this package? + if(license!=null && license.length>0 && includeBundle(license[0])) + { + //Since we are including the CC-LICENSE bitstreams, lets find our CC-LICENSE (RDF) bitstream path & link to it. + Bitstream ccRdf = CreativeCommons.getLicenseRdfBitstream(i); + mdRef.setXlinkHref(makeBitstreamURL(ccRdf, params)); + } + } } /** + * Return identifier for bitstream in an Item; when making a package, + * this is the archive member name (e.g. in Zip file). In a bare + * manifest, it might be an external URL. The name should be in URL + * format ("file:" may be elided for in-archive filenames). It should + * be deterministic, since this gets called twice for each bitstream + * when building archive. + */ + abstract public String makeBitstreamURL(Bitstream bitstream, PackageParameters params); + + /** + * Create metsHdr element - separate so subclasses can override. + */ + abstract public MetsHdr makeMetsHdr(Context context, DSpaceObject dso, + PackageParameters params); + /** * Returns name of METS profile to which this package conforms, e.g. * "DSpace METS DIP Profile 1.0" * @return string name of profile. @@ -745,35 +1279,69 @@ * @param params the PackageParameters passed to the disseminator. * @return array of metadata type strings, never null. */ - abstract public String [] getDmdTypes(PackageParameters params) + abstract public String [] getDmdTypes(Context context, DSpaceObject dso, PackageParameters params) throws SQLException, IOException, AuthorizeException; /** * Get the type string of the technical metadata to create for each - * Bitstream in the Item. The type string may be a simple name or - * colon-separated compound as specified for getDmdTypes() above. + * object and each Bitstream in an Item. The type string may be a + * simple name or colon-separated compound as specified for + * getDmdTypes() above. * @param params the PackageParameters passed to the disseminator. * @return array of metadata type strings, never null. */ - abstract public String getTechMdType(PackageParameters params) + abstract public String[] getTechMdTypes(Context context, DSpaceObject dso, PackageParameters params) throws SQLException, IOException, AuthorizeException; /** - * Add Rights metadata for the Item, in the form of - * (rightsMd elements) to the given metadata section. + * Get the type string of the source metadata to create for each + * object and each Bitstream in an Item. The type string may be a + * simple name or colon-separated compound as specified for + * getDmdTypes() above. + * @param params the PackageParameters passed to the disseminator. + * @return array of metadata type strings, never null. + */ + abstract public String[] getSourceMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException; + + /** + * Get the type string of the "digiprov" (digital provenance) + * metadata to create for each object and each Bitstream in an Item. + * The type string may be a simple name or colon-separated compound + * as specified for getDmdTypes() above. * + * @param params the PackageParameters passed to the disseminator. + * @return array of metadata type strings, never null. */ - abstract public void addRightsMd(Context context, Item item, AmdSec amdSec) - throws SQLException, IOException, AuthorizeException, MetsException; + abstract public String[] getDigiprovMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException; /** + * Get the type string of the "rights" (permission and/or license) + * metadata to create for each object and each Bitstream in an Item. + * The type string may be a simple name or colon-separated compound + * as specified for getDmdTypes() above. + * + * @param params the PackageParameters passed to the disseminator. + * @return array of metadata type strings, never null. + */ + abstract public String[] getRightsMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException; + + /** * Add any additional structMap elements to the * METS document, as required by this subclass. A simple default * structure map which fulfills the minimal DSpace METS DIP/SIP * requirements is already present, so this does not need to do anything. * @param mets the METS document to which to add structMaps */ - abstract public void addStructMap(Context context, Item item, + abstract public void addStructMap(Context context, DSpaceObject dso, PackageParameters params, Mets mets) throws SQLException, IOException, AuthorizeException, MetsException; + + /** + * @return true when this bundle should be included as "content" + * in the package.. e.g. DSpace SIP does not include metadata bundles. + */ + abstract public boolean includeBundle(Bundle bundle); } Index: PackageParameters.java =================================================================== --- PackageParameters.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) +++ PackageParameters.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -161,4 +161,203 @@ stringValue.equalsIgnoreCase("on") || stringValue.equalsIgnoreCase("yes"); } + + + /** + * Utility method to tell if workflow is enabled for Item ingestion. + * Checks the Packager parameters. + *

+ * Defaults to 'true' if previously unset, as by default all + * DSpace Workflows should be enabled. + * + * @return boolean result + */ + public boolean workflowEnabled() + { + return getBooleanProperty("useWorkflow", true); + } + + /*** + * Utility method to enable/disable workflow for Item ingestion. + * + * @param value boolean value (true = workflow enabled, false = workflow disabled) + * @return boolean result + */ + public void setWorkflowEnabled(boolean value) + { + addProperty("useWorkflow", String.valueOf(value)); + } + + + /*** + * Utility method to tell if restore mode is enabled. + * Checks the Packager parameters. + *

+ * Restore mode attempts to restore an missing/deleted object completely + * (including handle), based on contents of a package. + *

+ * NOTE: restore mode should throw an error if it attempts to restore an + * object which already exists. Use 'keep-existing' or 'replace' mode to + * either skip-over (keep) or replace existing objects. + *

+ * Defaults to 'false' if previously unset. NOTE: 'replace' mode and + * 'keep-existing' mode are special types of "restores". So, when either + * replaceModeEnabled() or keepExistingModeEnabled() is true, this method + * should also return true. + * + * @return boolean result + */ + public boolean restoreModeEnabled() + { + if(getBooleanProperty("restoreMode", false) || + replaceModeEnabled() || + keepExistingModeEnabled()) + return true; + else + return false; + } + + /*** + * Utility method to enable/disable restore mode. + *

+ * Restore mode attempts to restore an missing/deleted object completely + * (including handle), based on a given package's contents. + *

+ * NOTE: restore mode should throw an error if it attempts to restore an + * object which already exists. Use 'keep-existing' or 'replace' mode to + * either skip-over (keep) or replace existing objects. + * + * @param value boolean value (true = restore enabled, false = restore disabled) + * @return boolean result + */ + public void setRestoreModeEnabled(boolean value) + { + addProperty("restoreMode", String.valueOf(value)); + } + + /*** + * Utility method to tell if replace mode is enabled. + * Checks the Packager parameters. + *

+ * Replace mode attempts to overwrite an existing object and replace it + * with the contents of a package. Replace mode is considered a special type + * of "restore", where the current object is being restored to a previous state. + *

+ * Defaults to 'false' if previously unset. + * + * @return boolean result + */ + public boolean replaceModeEnabled() + { + return getBooleanProperty("replaceMode", false); + } + + /*** + * Utility method to enable/disable replace mode. + *

+ * Replace mode attempts to overwrite an existing object and replace it + * with the contents of a package. Replace mode is considered a special type + * of "restore", where the current object is being restored to a previous state. + * + * @param value boolean value (true = replace enabled, false = replace disabled) + * @return boolean result + */ + public void setReplaceModeEnabled(boolean value) + { + addProperty("replaceMode", String.valueOf(value)); + } + + /*** + * Utility method to tell if 'keep-existing' mode is enabled. + * Checks the Packager parameters. + *

+ * Keep-Existing mode is identical to 'restore' mode, except that it + * skips over any objects which are found to already be existing. It + * essentially restores all missing objects, but keeps existing ones intact. + *

+ * Defaults to 'false' if previously unset. + * + * @return boolean result + */ + public boolean keepExistingModeEnabled() + { + return getBooleanProperty("keepExistingMode", false); + } + + /*** + * Utility method to enable/disable 'keep-existing' mode. + *

+ * Keep-Existing mode is identical to 'restore' mode, except that it + * skips over any objects which are found to already be existing. It + * essentially restores all missing objects, but keeps existing ones intact. + * + * @param value boolean value (true = replace enabled, false = replace disabled) + * @return boolean result + */ + public void setKeepExistingModeEnabled(boolean value) + { + addProperty("keepExistingMode", String.valueOf(value)); + } + + /*** + * Utility method to tell if Items should use a Collection's template + * when they are created. + *

+ * Defaults to 'false' if previously unset. + * + * @return boolean result + */ + public boolean useCollectionTemplate() + { + return getBooleanProperty("useCollectionTemplate", false); + } + + /*** + * Utility method to enable/disable Collection Template for Item ingestion. + *

+ * When enabled, the Item will be installed using the parent collection's + * Item Template + * + * @param value boolean value (true = template enabled, false = template disabled) + * @return boolean result + */ + public void setUseCollectionTemplate(boolean value) + { + addProperty("useCollectionTemplate", String.valueOf(value)); + } + + + /*** + * Utility method to tell if recursive mode is enabled. + * Checks the Packager parameters. + *

+ * Recursive mode should be enabled anytime one of the *All() methods + * is called (e.g. ingestAll(), replaceAll() or disseminateAll()). It + * recursively performs the same action on all related objects. + *

+ * Defaults to 'false' if previously unset. + * + * @return boolean result + */ + public boolean recursiveModeEnabled() + { + return getBooleanProperty("recursiveMode", false); + } + + /*** + * Utility method to enable/disable recursive mode. + *

+ * Recursive mode should be enabled anytime one of the *All() methods + * is called (e.g. ingestAll(), replaceAll() or disseminateAll()). It + * recursively performs the same action on all related objects. + * + * @param value boolean value (true = recursion enabled, false = recursion disabled) + * @return boolean result + */ + public void setRecursiveModeEnabled(boolean value) + { + addProperty("recursiveMode", String.valueOf(value)); + } + + } Index: AbstractPackageIngester.java =================================================================== --- AbstractPackageIngester.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 0) +++ AbstractPackageIngester.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -0,0 +1,354 @@ +/** + * AbstractPackageIngester.java + * + * Version: $Revision$ + * + * Date: $Date$ + * + * Copyright (c) 2010, DuraSpace. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the DSpace Foundation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +package org.dspace.content.packager; + +import java.io.File; +import java.io.IOException; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.log4j.Logger; + +import org.dspace.authorize.AuthorizeException; +import org.dspace.content.DSpaceObject; +import org.dspace.content.crosswalk.CrosswalkException; +import org.dspace.core.Constants; +import org.dspace.core.Context; +import org.dspace.core.LogManager; + +/** + * An abstract implementation of a DSpace Package Ingester, which + * implements a few helper/utility methods that most (all?) PackageIngesters + * may find useful. + *

+ * First, implements recursive functionality in ingestAll() and replaceAll() + * methods of the PackageIngester interface. These methods are setup to + * recursively call ingest() and replace() respectively. + *

+ * Finally, it also implements several utility methods (createDSpaceObject(), + * finishCreateItem(), updateDSpaceObject()) which subclasses may find useful. + * This classes will allow subclasses to easily create/update objects without + * having to worry too much about normal DSpace submission workflows (which is + * taken care of in these utility methods). + *

+ * All Package ingesters should either extend this abstract class + * or implement PackageIngester to better suit their needs. + * + * @author Tim Donohue + * @see PackageIngester + */ +public abstract class AbstractPackageIngester + implements PackageIngester +{ + /** log4j category */ + private static Logger log = Logger.getLogger(AbstractPackageIngester.class); + + /** + * References to other packages -- these are the next packages to ingest recursively + * Key = DSpace Object just ingested, Value = List of all packages relating to a DSpaceObject + **/ + private Map> packageReferences = new HashMap>(); + + /** List of all successfully ingested/replaced DSpace objects */ + private List dsoIngestedList = new ArrayList(); + + /** + * Recursively create one or more DSpace Objects out of the contents + * of the ingested package (and all other referenced packages). + * The initial object is created under the indicated parent. All other + * objects are created based on their relationship to the initial object. + *

+ * For example, a scenario may be to create a Collection based on a + * collection-level package, and also create an Item for every item-level + * package referenced by the collection-level package. + *

+ * The output of this method is one or more newly created DspaceObjects. + *

+ * The packager may choose not to implement ingestAll, + * or simply forward the call to ingest if it is unable to support + * recursive ingestion. + *

+ * The deposit license (Only significant for Item) is passed + * explicitly as a string since there is no place for it in many + * package formats. It is optional and may be given as + * null. + * + * @param context DSpace context. + * @param parent parent under which to create the initial object + * (may be null -- in which case ingester must determine parent from package + * or throw an error). + * @param pkgFile The initial package file to ingest + * @param params Properties-style list of options (interpreted by each packager). + * @param license may be null, which takes default license. + * @return List of DSpaceObjects created + * + * @throws PackageValidationException if initial package (or any referenced package) + * is unacceptable or there is a fatal error in creating a DSpaceObject + * @throws UnsupportedOperationException if this packager does not + * implement ingestAll + */ + public List ingestAll(Context context, DSpaceObject parent, File pkgFile, + PackageParameters params, String license) + throws PackageException, UnsupportedOperationException, + CrosswalkException, AuthorizeException, + SQLException, IOException + { + //If unset, make sure the Parameters specifies this is a recursive ingest + if(!params.recursiveModeEnabled()) params.setRecursiveModeEnabled(true); + + //Initial DSpace Object to ingest + DSpaceObject dso = null; + + //try to ingest the first package + try + { + //actually ingest pkg using provided PackageIngester + dso = ingest(context, parent, pkgFile, params, license); + } + catch(IllegalStateException ie) + { + // NOTE: if we encounter an IllegalStateException, this means the + // handle is already in use and this object already exists. + + //if we are skipping over (i.e. keeping) existing objects + if(params.keepExistingModeEnabled()) + { + log.warn(LogManager.getHeader(context, "skip_package_ingest", "Object already exists, package-skipped=" + pkgFile)); + } + else // Pass this exception on -- which essentially causes a full rollback of all changes (this is the default) + throw ie; + } + + //as long as our first object was ingested successfully + if(dso!=null) + { + //add to list of successfully ingested objects + addToIngestedList(dso); + + //We can only recursively ingest non-Items + //(NOTE: Items have no children, as Bitstreams/Bundles are created from Item packages) + if(dso.getType()!=Constants.ITEM) + { + //Check if we found child package references when ingesting this latest DSpaceObject + List childPkgRefs = getPackageReferences(dso); + + //we can only recursively ingest child packages + //if we have references to them + if(childPkgRefs!=null && !childPkgRefs.isEmpty()) + { + //Recursively ingest each child package, using this current object as the parent DSpace Object + for(String childPkgRef : childPkgRefs) + { + //Assume package reference is relative to current package location + File childPkg = new File(pkgFile.getAbsoluteFile().getParent(), childPkgRef); + + //fun, it's recursive! -- ingested referenced package as a child of current object + ingestAll(context, dso, childPkg, params, license); + } + }//end if child pkgs + }//end if not an Item + }//end if DSpaceObject not null + + //Return list of all objects ingested + return getIngestedList(); + } + + + /** + * Recursively replace one or more DSpace Objects out of the contents + * of the ingested package (and all other referenced packages). + * The initial object to replace is indicated by dso. All other + * objects are replaced based on information provided in the referenced packages. + *

+ * For example, a scenario may be to replace a Collection based on a + * collection-level package, and also replace *every* Item in that collection + * based on the item-level packages referenced by the collection-level package. + *

+ * Please note that since the dso input only specifies the + * initial object to replace, any additional objects to replace must be + * determined based on the referenced packages (or initial package itself). + *

+ * The output of this method is one or more replaced DspaceObjects. + *

+ * The packager may choose not to implement replaceAll, + * since it somewhat contradicts the archival nature of DSpace. It also + * may choose to forward the call to replace if it is unable to + * support recursive replacement. + * + * @param context DSpace context. + * @param dso initial existing DSpace Object to be replaced, may be null + * if object to replace can be determined from package + * @param pkgFile The package file to ingest. + * @param params Properties-style list of options specific to this packager + * @return List of DSpaceObjects replaced + * + * @throws PackageValidationException if initial package (or any referenced package) + * is unacceptable or there is a fatal error in creating a DSpaceObject + * @throws UnsupportedOperationException if this packager does not + * implement replaceAll + */ + public List replaceAll(Context context, DSpaceObject dso, + File pkgFile, PackageParameters params) + throws PackageException, UnsupportedOperationException, + CrosswalkException, AuthorizeException, + SQLException, IOException + { + //If unset, make sure the Parameters specifies this is a recursive replace + if(!params.recursiveModeEnabled()) params.setRecursiveModeEnabled(true); + + //actually ingest pkg using provided PackageIngester, and replace object + //NOTE: 'dso' may be null! If it is null, the PackageIngester must determine + // the object to be replaced from the package itself. + DSpaceObject replacedDso = replace(context, dso, pkgFile, params); + + //Log that we replaced object using a package + log.info(LogManager.getHeader(context, "package_replace", "object-type=" + Constants.typeText[replacedDso.getType()] + ", object-handle=" + replacedDso.getHandle())); + + //add to list of successfully replaced objects + addToIngestedList(replacedDso); + + //We can only recursively replace non-Items + //(NOTE: Items have no children, as Bitstreams/Bundles are created from Item packages) + if(replacedDso.getType()!=Constants.ITEM) + { + //Check if we found child package references when replacing this latest DSpaceObject + List childPkgRefs = getPackageReferences(replacedDso); + + //we can only recursively ingest child packages + //if we have references to them + if(childPkgRefs!=null && !childPkgRefs.isEmpty()) + { + //Recursively replace each child package + for(String childPkgRef : childPkgRefs) + { + //Assume package reference is relative to current package location + File childPkg = new File(pkgFile.getAbsoluteFile().getParent(), childPkgRef); + + //fun, it's recursive! -- replaced referenced package as a child of current object + // Pass object to replace as 'null', as we don't know which object to replace. + replaceAll(context, null, childPkg, params); + } + }//end if child pkgs + }//end if not an Item + + //Return list of all objects replaced + return getIngestedList(); + } + + + /** + * During ingestion process, some submission information packages (SIPs) + * may reference other packages to be ingested (recursively). + *

+ * This method collects all references to other packages, so that we + * can choose to recursively ingest them, as necessary, alongside the + * DSpaceObject created from the original SIP. + *

+ * References are collected based on the DSpaceObject created from the SIP + * (this way we keep the context of these references). + * + * @param dso DSpaceObject whose SIP referenced another package + * @param packageRef A reference to another package, which can be ingested after this one + */ + public void addPackageReference(DSpaceObject dso, String packageRef) + { + List packageRefValues = null; + + // Check if we already have an entry for packages reference by this object + if(packageReferences.containsKey(dso)) + { + packageRefValues = packageReferences.get(dso); + } + else + { + //Create a new empty list of references + packageRefValues = new ArrayList(); + } + + //add this package reference to existing list and save + packageRefValues.add(packageRef); + packageReferences.put(dso, packageRefValues); + } + + /** + * Return a list of known SIP references from a newly created DSpaceObject. + *

+ * These references should detail where another package exists which + * should be ingested alongside the current DSpaceObject. + *

+ * The AbstractPackageIngester or an equivalent SIP handler is expected + * to understand how to deal with these package references. + * + * @param dso DSpaceObject whose SIP referenced other SIPs + * @return List of Strings which are the references to external submission ingestion packages + * (may be null if no SIPs were referenced) + */ + public List getPackageReferences(DSpaceObject dso) + { + return packageReferences.get(dso); + } + + /** + * Add DSpaceObject to list of successfully ingested/replaced objects + * @param dso DSpaceObject + */ + protected void addToIngestedList(DSpaceObject dso) + { + //add to list of successfully ingested objects + dsoIngestedList.add(dso); + } + + /** + * Return List of all DSpaceObjects which have been ingested/replaced by + * this instance of the Ingester. + *

+ * This list can be useful in reporting back to the user what content has + * been added or replaced. It's used by ingestAll() and replaceAll() to + * return this list of everything that was ingested/replaced. + * + * @return List of DSpaceObjects which have been added/replaced + */ + protected List getIngestedList() + { + return dsoIngestedList; + } +} Index: DSpaceMETSIngester.java =================================================================== --- DSpaceMETSIngester.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) +++ DSpaceMETSIngester.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -39,19 +39,22 @@ package org.dspace.content.packager; import java.io.IOException; +import java.io.InputStream; import java.sql.SQLException; -import java.util.Set; import org.apache.log4j.Logger; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Bitstream; -import org.dspace.content.Bundle; import org.dspace.content.Collection; +import org.dspace.content.DSpaceObject; import org.dspace.content.Item; import org.dspace.content.crosswalk.CrosswalkException; import org.dspace.content.crosswalk.MetadataValidationException; import org.dspace.core.Context; -import org.dspace.license.CreativeCommons; +import org.dspace.core.Constants; +import org.dspace.core.PluginManager; +import org.dspace.app.mediafilter.MediaFilter; + import org.jdom.Element; /** @@ -66,8 +69,12 @@ * for more information about the DSpace METS SIP profile. * * @author Larry Stone + * @author Tim Donohue * @version $Revision$ * @see org.dspace.content.packager.METSManifest + * @see AbstractMETSIngester + * @see AbstractPackageIngester + * @see PackageIngester */ public class DSpaceMETSIngester extends AbstractMETSIngester @@ -89,16 +96,6 @@ throw new MetadataValidationException("METS has unacceptable PROFILE value, profile="+profile); } - // nothing needed. - public void checkPackageFiles(Set packageFiles, Set missingFiles, - METSManifest manifest) - throws PackageValidationException, CrosswalkException - { - // This is where a subclass would arrange to use or ignore - // any "extra" files added to its type of package. - } - - /** * Choose DMD section(s) to crosswalk. *

@@ -109,11 +106,11 @@ * same GROUPID
* 4. Crosswalk remaining DMDs not eliminated already. */ - public void chooseItemDmd(Context context, Item item, + public void crosswalkObjectDmd(Context context, DSpaceObject dso, METSManifest manifest, AbstractMETSIngester.MdrefManager callback, Element dmds[], PackageParameters params) - throws CrosswalkException, + throws CrosswalkException, PackageValidationException, AuthorizeException, SQLException, IOException { int found = -1; @@ -152,7 +149,7 @@ String groupID = null; if (found >= 0) { - manifest.crosswalkItem(context, item, dmds[found], callback); + manifest.crosswalkItemDmd(context, dso, dmds[found], callback); groupID = dmds[found].getAttributeValue("GROUPID"); if (groupID != null) @@ -161,7 +158,7 @@ { String g = dmds[i].getAttributeValue("GROUPID"); if (g != null && !g.equals(groupID)) - manifest.crosswalkItem(context, item, dmds[i], callback); + manifest.crosswalkItemDmd(context, dso, dmds[i], callback); } } } @@ -171,7 +168,7 @@ else { if (dmds.length > 0) - manifest.crosswalkItem(context, item, dmds[0], callback); + manifest.crosswalkItemDmd(context, dso, dmds[0], callback); } } @@ -182,52 +179,86 @@ * default deposit license. * For Creative Commons, look for a rightsMd containing a CC license. */ - public void addLicense(Context context, Collection collection, - Item item, METSManifest manifest, - AbstractMETSIngester.MdrefManager callback, - String license) - throws PackageValidationException, CrosswalkException, + public void addLicense(Context context, Item item, String license, + Collection collection, PackageParameters params) + throws PackageValidationException, AuthorizeException, SQLException, IOException { + if (PackageUtils.findDepositLicense(context, item) == null) PackageUtils.addDepositLicense(context, license, item, collection); + } - // If package includes a Creative Commons license, add that: - Element rmds[] = manifest.getItemRightsMD(); - for (int i = 0; i < rmds.length; ++i) + public void finishObject(Context context, DSpaceObject dso) + throws PackageValidationException, CrosswalkException, + AuthorizeException, SQLException, IOException + { + // nothing to do. + } + + public int getObjectType(METSManifest manifest) + throws PackageValidationException + { + return Constants.ITEM; + } + + // return name of derived file as if MediaFilter created it, or null + // only needed when importing a SIP without canonical DSpace derived file naming. + private String makeDerivedFilename(String bundleName, String origName) + { + // get the MediaFilter that would create this bundle: + String mfNames[] = PluginManager.getAllPluginNames(MediaFilter.class); + + for (int i = 0; i < mfNames.length; ++i) + { + MediaFilter mf = (MediaFilter)PluginManager.getNamedPlugin(MediaFilter.class, mfNames[i]); + if (bundleName.equals(mf.getBundleName())) + return mf.getFilteredName(origName); + } + return null; + } + + /** + * Take a second pass over files to correct names of derived files + * (e.g. thumbnails, extracted text) to what DSpace expects: + */ + public void finishBitstream(Context context, + Bitstream bs, + Element mfile, + METSManifest manifest, + PackageParameters params) + throws MetadataValidationException, SQLException, AuthorizeException, IOException + { + String bundleName = METSManifest.getBundleName(mfile); + if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) { - String type = manifest.getMdType(rmds[i]); - if (type != null && type.equals("Creative Commons")) + String opath = manifest.getOriginalFilePath(mfile); + if (opath != null) { - log.debug("Got Creative Commons license in rightsMD"); - CreativeCommons.setLicense(context, item, - manifest.getMdContentAsStream(rmds[i], callback), - manifest.getMdContentMimeType(rmds[i])); + // String ofileId = origFile.getAttributeValue("ID"); + // Bitstream obs = (Bitstream)fileIdToBitstream.get(ofileId); - // if there was a bitstream, get rid of it, since - // it's just an artifact now that the CC license is installed. - Element mdRef = rmds[i].getChild("mdRef", METSManifest.metsNS); - if (mdRef != null) + String newName = makeDerivedFilename(bundleName, opath); + + if (newName != null) { - Bitstream bs = callback.getBitstreamForMdRef(mdRef); - if (bs != null) - { - Bundle parent[] = bs.getBundles(); - if (parent.length > 0) - { - parent[0].removeBitstream(bs); - parent[0].update(); - } - } + //String mfileId = mfile.getAttributeValue("ID"); + //Bitstream bs = (Bitstream)fileIdToBitstream.get(mfileId); + bs.setName(newName); + bs.update(); } } } } - // last change to fix up Item. - public void finishItem(Context context, Item item) - throws PackageValidationException, CrosswalkException, - AuthorizeException, SQLException, IOException + + public String getConfigurationName() { - // nothing to do. + return "dspaceSIP"; } + + + public boolean probe(Context context, InputStream in, PackageParameters params) + { + throw new UnsupportedOperationException("PDF package ingester does not implement probe()"); + } } Index: DSpaceAIPIngester.java =================================================================== --- DSpaceAIPIngester.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 0) +++ DSpaceAIPIngester.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -0,0 +1,281 @@ +/* + * DSpaceAIPIngester + * + * Version: $Revision: 1.1 $ + * + * Date: $Date: 2006/03/17 00:04:38 $ + * + * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts + * Institute of Technology. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the Hewlett-Packard Company nor the name of the + * Massachusetts Institute of Technology nor the names of their + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +package org.dspace.content.packager; + +import java.io.IOException; +import java.sql.SQLException; + +import org.jdom.Element; + +import org.apache.log4j.Logger; +import org.dspace.authorize.AuthorizeException; +import org.dspace.content.Bitstream; +import org.dspace.content.Collection; +import org.dspace.content.DSpaceObject; +import org.dspace.content.Item; +import org.dspace.content.crosswalk.CrosswalkException; +import org.dspace.content.crosswalk.MetadataValidationException; +import org.dspace.core.Context; +import org.dspace.core.Constants; + +/** + * Subclass of the METS packager framework to ingest a DSpace + * Archival Information Package (AIP). The AIP is intended to be, foremost, + * a _complete_ and _accurate_ representation of one object in the DSpace + * object model. An AIP contains all of the information needed to restore + * the object precisely in another DSpace archive instance. + *

+ * This ingester recognizes two distinct types of AIPs: "Manifest-Only" and "External". + * The Manifest-Only AIP, which is selected by specifying a PackageParameters + * key "manifestOnly" with the value "true", refers to all its contents by + * reference only. For Community or Collection AIPs this means all references to their + * child objects are just via Handles. For Item AIPs all Bitreams are just + * referenced by their asset store location instead of finding them in the "package". + * The Manifest-Only AIP package format is simply a METS XML document serialized into a file. + *

+ * An "external" AIP (the default), is a conventional Zip-file based package + * that includes copies of all bitstreams referenced by the object as well + * as a serialized METS XML document in the path "mets.xml". + * + * Configuration keys: + * + * # instructs which xwalk plugin to use for a given type of metadata + * mets.dspaceAIP.ingest.crosswalk.{mdSecName} = {pluginName} + * mets.dspaceAIP.ingest.crosswalk.DC = QDC + * mets.dspaceAIP.ingest.crosswalk.DSpaceDepositLicense = NULLSTREAM + * + * # Option to save METS manifest in the item: (default is false) + * mets.default.ingest.preserveManifest = false + * + * @author Larry Stone + * @author Tim Donohue + * @version $Revision: 1.1 $ + * + * @see AbstractMETSIngester + * @see AbstractPackageIngester + * @see PackageIngester + * @see org.dspace.content.packager.METSManifest + */ +public class DSpaceAIPIngester + extends AbstractMETSIngester +{ + /** log4j category */ + private static Logger log = Logger.getLogger(DSpaceAIPIngester.class); + + /** + * Ensure it's an AIP generated by the complementary AIP disseminator. + */ + void checkManifest(METSManifest manifest) + throws MetadataValidationException + { + String profile = manifest.getProfile(); + if (profile == null) + throw new MetadataValidationException("Cannot accept METS with no PROFILE attribute!"); + else if (!profile.equals(DSpaceAIPDisseminator.PROFILE_1_0)) + throw new MetadataValidationException("METS has unacceptable PROFILE attribute, profile="+profile); + } + + + /** + * Choose DMD section(s) to crosswalk. + *

+ * The algorithm is:
+ * 1. Use whatever the dmd parameter specifies as the primary DMD.
+ * 2. If (1) is unspecified, find DIM (preferably) or MODS as primary DMD.
+ * 3. If (1) or (2) succeeds, crosswalk it and ignore all other DMDs with + * same GROUPID
+ * 4. Crosswalk remaining DMDs not eliminated already. + */ + public void crosswalkObjectDmd(Context context, DSpaceObject dso, + METSManifest manifest, + AbstractMETSIngester.MdrefManager callback, + Element dmds[], PackageParameters params) + throws CrosswalkException, PackageValidationException, + AuthorizeException, SQLException, IOException + { + int found = -1; + + // Check to see what dmdSec the user specified in the 'dmd' parameter + String userDmd = null; + if (params != null) + userDmd = params.getProperty("dmd"); + if (userDmd != null && userDmd.length() > 0) + { + for (int i = 0; i < dmds.length; ++i) + if (userDmd.equalsIgnoreCase(manifest.getMdType(dmds[i]))) + found = i; + } + + // DIM is preferred, if nothing specified by user + if (found == -1) + { + // DIM is preferred for AIP + for (int i = 0; i < dmds.length; ++i) + //NOTE: METS standard actually says this should be DIM (all uppercase). But, + // just in case, we're going to be a bit more forgiving. + if ("DIM".equalsIgnoreCase(manifest.getMdType(dmds[i]))) + found = i; + } + + // MODS is acceptable otehrwise.. + if (found == -1) + { + for (int i = 0; i < dmds.length; ++i) + //NOTE: METS standard actually says this should be MODS (all uppercase). But, + // just in case, we're going to be a bit more forgiving. + if ("MODS".equalsIgnoreCase(manifest.getMdType(dmds[i]))) + found = i; + } + + String groupID = null; + if (found >= 0) + { + manifest.crosswalkItemDmd(context, dso, dmds[found], callback); + groupID = dmds[found].getAttributeValue("GROUPID"); + + if (groupID != null) + { + for (int i = 0; i < dmds.length; ++i) + { + String g = dmds[i].getAttributeValue("GROUPID"); + if (g != null && !g.equals(groupID)) + manifest.crosswalkItemDmd(context, dso, dmds[i], callback); + } + } + } + + // otherwise take the first. Don't xwalk more than one because + // each xwalk _adds_ metadata, and could add duplicate fields. + else if (dmds.length > 0) + { + manifest.crosswalkItemDmd(context, dso, dmds[0], callback); + } + + // it's an error if there is nothing to crosswalk: + else + throw new MetadataValidationException("DSpaceAIPIngester: Could not find an acceptable object-wide DMD section in manifest."); + } + + + /** + * Ignore license when restoring an manifest-only AIP, since it should + * be a bitstream in the AIP already. + * Otherwise: Check item for license first; then, take deposit + * license supplied by explicit argument next, else use collection's + * default deposit license. + * Normally the rightsMD crosswalks should provide a license. + */ + public void addLicense(Context context, Item item, String license, + Collection collection, PackageParameters params) + throws PackageValidationException, + AuthorizeException, SQLException, IOException + { + boolean newLicense = false; + + if(!params.restoreModeEnabled()) + { + //AIP is not being restored/replaced, so treat it like a SIP -- every new SIP needs a new license + newLicense = true; + } + + // Add deposit license if there isn't one in the object, + // and it's not a restoration of an "manifestOnly" AIP: + if (!params.getBooleanProperty("manifestOnly", false) && + PackageUtils.findDepositLicense(context, item) == null) + { + newLicense = true; + } + + if(newLicense) + PackageUtils.addDepositLicense(context, license, item, collection); + } + + /** + * Last change to fix up a DSpace Object + */ + public void finishObject(Context context, DSpaceObject dso) + throws PackageValidationException, CrosswalkException, + AuthorizeException, SQLException, IOException + { + // nothing to do. + } + + /** + * Nothing extra to do to bitstream after ingestion. + */ + public void finishBitstream(Context context, + Bitstream bs, + Element mfile, + METSManifest manifest, + PackageParameters params) + throws MetadataValidationException, SQLException, AuthorizeException, IOException + { + // nothing to do. + } + + /** + * Return the type of DSpaceObject in this package; it is + * in the TYPE attribute of the mets:mets element. + */ + public int getObjectType(METSManifest manifest) + throws PackageValidationException + { + Element mets = manifest.getMets(); + String typeStr = mets.getAttributeValue("TYPE"); + if (typeStr == null || typeStr.length() == 0) + throw new PackageValidationException("Manifest is missing the required mets@TYPE attribute."); + if (typeStr.startsWith("DSpace ")) + typeStr = typeStr.substring(7); + int type = Constants.getTypeID(typeStr); + if (type < 0) + throw new PackageValidationException("Manifest has unrecognized value in mets@TYPE attribute: "+typeStr); + return type; + } + + /** + * Name used to distinguish DSpace Configuration entries for this subclass. + */ + public String getConfigurationName() + { + return "dspaceAIP"; + } + +} Index: PackageIngester.java =================================================================== --- PackageIngester.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) +++ PackageIngester.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -38,28 +38,28 @@ package org.dspace.content.packager; +import java.io.File; import java.io.IOException; -import java.io.InputStream; import java.sql.SQLException; +import java.util.List; import org.dspace.authorize.AuthorizeException; -import org.dspace.content.Collection; -import org.dspace.content.Item; -import org.dspace.content.WorkspaceItem; +import org.dspace.content.DSpaceObject; import org.dspace.content.crosswalk.CrosswalkException; import org.dspace.core.Context; - + /** * Plugin Interface to interpret a Submission Information Package (SIP) - * and create (or replace) a DSpace item from its contents. + * and create (or replace) a DSpace Object from its contents. *

- * A package is a single data stream - * containing enough information to construct an Item. It can be - * anything from an archive like a Zip file with a manifest and metadata, - * to a simple manifest containing external references to the content, - * to a self-contained file such as a PDF. The interpretation - * of the package is entirely at the discretion of the implementing class. + * A package is a single data stream containing enough information to + * construct an Object (i.e. an Item, Collection, or Community). It + * can be anything from an archive like a Zip file with a manifest and + * metadata, to a simple manifest containing external references to the + * content, to a self-contained file such as a PDF. The interpretation + * of the package is entirely at the discretion of the implementing + * class. *

* The ingest methods are also given an attribute-value * list of "parameters" which may modify their actions. @@ -68,58 +68,151 @@ * understand different sets of parameters. * * @author Larry Stone + * @author Tim Donohue * @version $Revision$ * @see PackageParameters + * @see AbstractPackageIngester */ public interface PackageIngester { /** - * Create new Item out of the ingested package. - * The item will belong to the indicated - * collection. This creates a WorkspaceItem, so it is - * up to the caller to decide whether to install it or submit - * it to normal DSpace Workflow. + * Create new DSpaceObject out of the ingested package. The object + * is created under the indicated parent. This creates a + * DSpaceObject. For Items, it is up to the caller to + * decide whether to install it or submit it to normal DSpace Workflow. *

- * The deposit license is passed explicitly as a string since there - * is no place for it in many package formats. It is optional and may - * be given as null. + * The deposit license (Only significant for Item) is passed + * explicitly as a string since there is no place for it in many + * package formats. It is optional and may be given as + * null. + *

+ * Use ingestAll method to perform a recursive ingest of all + * packages which are referenced by an initial package. * * @param context DSpace context. - * @param collection collection under which to create new item. - * @param in input stream containing package to ingest. + * @param parent parent under which to create new object + * (may be null -- in which case ingester must determine parent from package + * or throw an error). + * @param pkgFile The package file to ingest * @param params Properties-style list of options (interpreted by each packager). * @param license may be null, which takes default license. - * @return workspace item created by ingest. + * @return DSpaceObject created by ingest. * * @throws PackageValidationException if package is unacceptable or there is - * a fatal error turning it into an Item. + * a fatal error turning it into a DSpaceObject. */ - WorkspaceItem ingest(Context context, Collection collection, InputStream in, + DSpaceObject ingest(Context context, DSpaceObject parent, File pkgFile, PackageParameters params, String license) throws PackageException, CrosswalkException, AuthorizeException, SQLException, IOException; + /** - * Replace an existing Item with contents of the ingested package. + * Recursively create one or more DSpace Objects out of the contents + * of the ingested package (and all other referenced packages). + * The initial object is created under the indicated parent. All other + * objects are created based on their relationship to the initial object. + *

+ * For example, a scenario may be to create a Collection based on a + * collection-level package, and also create an Item for every item-level + * package referenced by the collection-level package. + *

+ * The output of this method is one or more newly created DspaceObjects. + *

+ * The packager may choose not to implement ingestAll, + * or simply forward the call to ingest if it is unable to support + * recursive ingestion. + *

+ * The deposit license (Only significant for Item) is passed + * explicitly as a string since there is no place for it in many + * package formats. It is optional and may be given as + * null. + * + * @param context DSpace context. + * @param parent parent under which to create the initial object + * (may be null -- in which case ingester must determine parent from package + * or throw an error). + * @param pkgFile The initial package file to ingest + * @param params Properties-style list of options (interpreted by each packager). + * @param license may be null, which takes default license. + * @return List of DSpaceObjects created + * + * @throws PackageValidationException if initial package (or any referenced package) + * is unacceptable or there is a fatal error in creating a DSpaceObject + * @throws UnsupportedOperationException if this packager does not + * implement ingestAll + */ + List ingestAll(Context context, DSpaceObject parent, File pkgFile, + PackageParameters params, String license) + throws PackageException, UnsupportedOperationException, + CrosswalkException, AuthorizeException, + SQLException, IOException; + + /** + * Replace an existing DSpace Object with contents of the ingested package. * The packager may choose not to implement replace, * since it somewhat contradicts the archival nature of DSpace. * The exact function of this method is highly implementation-dependent. + *

+ * Use replaceAll method to perform a recursive replace of + * objects referenced by a set of packages. * * @param context DSpace context. - * @param item existing item to be replaced - * @param in input stream containing package to ingest. + * @param dso existing DSpace Object to be replaced, may be null + * if object to replace can be determined from package + * @param pkgFile The package file to ingest. * @param params Properties-style list of options specific to this packager - * @return item re-created by ingest. + * @return DSpaceObject with contents replaced * * @throws PackageValidationException if package is unacceptable or there is * a fatal error turning it into an Item. * @throws UnsupportedOperationException if this packager does not * implement replace. */ - Item replace(Context context, Item item, InputStream in, - PackageParameters params) + DSpaceObject replace(Context context, DSpaceObject dso, + File pkgFile, PackageParameters params) throws PackageException, UnsupportedOperationException, CrosswalkException, AuthorizeException, SQLException, IOException; - + + /** + * Recursively replace one or more DSpace Objects out of the contents + * of the ingested package (and all other referenced packages). + * The initial object to replace is indicated by dso. All other + * objects are replaced based on information provided in the referenced packages. + *

+ * For example, a scenario may be to replace a Collection based on a + * collection-level package, and also replace *every* Item in that collection + * based on the item-level packages referenced by the collection-level package. + *

+ * Please note that since the dso input only specifies the + * initial object to replace, any additional objects to replace must be + * determined based on the referenced packages (or initial package itself). + *

+ * The output of this method is one or more replaced DspaceObjects. + *

+ * The packager may choose not to implement replaceAll, + * since it somewhat contradicts the archival nature of DSpace. It also + * may choose to forward the call to replace if it is unable to + * support recursive replacement. + * + * @param context DSpace context. + * @param dso initial existing DSpace Object to be replaced, may be null + * if object to replace can be determined from package + * @param pkgFile The package file to ingest. + * @param params Properties-style list of options specific to this packager + * @return List of DSpaceObjects replaced + * + * @throws PackageValidationException if initial package (or any referenced package) + * is unacceptable or there is a fatal error in creating a DSpaceObject + * @throws UnsupportedOperationException if this packager does not + * implement replaceAll + */ + List replaceAll(Context context, DSpaceObject dso, + File pkgFile, PackageParameters params) + throws PackageException, UnsupportedOperationException, + CrosswalkException, AuthorizeException, + SQLException, IOException; + + } Index: AbstractMETSIngester.java =================================================================== --- AbstractMETSIngester.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) +++ AbstractMETSIngester.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257) @@ -38,478 +38,1115 @@ package org.dspace.content.packager; +import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.net.URL; +import java.net.URLConnection; import java.sql.SQLException; -import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; import java.util.List; -import java.util.Map; -import java.util.Set; import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; +import java.util.zip.ZipFile; import org.apache.log4j.Logger; -import org.dspace.app.mediafilter.MediaFilter; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Bitstream; import org.dspace.content.BitstreamFormat; import org.dspace.content.Bundle; import org.dspace.content.Collection; +import org.dspace.content.Community; +import org.dspace.content.DSpaceObject; import org.dspace.content.FormatIdentifier; import org.dspace.content.Item; -import org.dspace.content.WorkspaceItem; import org.dspace.content.crosswalk.CrosswalkException; import org.dspace.content.crosswalk.MetadataValidationException; import org.dspace.core.ConfigurationManager; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.core.LogManager; -import org.dspace.core.PluginManager; +import org.dspace.handle.HandleManager; import org.jdom.Element; /** * Base class for package ingester of - * METS (Metadata Encoding & Transmission Standard) Package.
+ * METS (Metadata Encoding & Transmission Standard) Packages.
* See http://www.loc.gov/standards/mets/ *

* This is a generic packager framework intended to be subclassed to create * ingesters for more specific METS "profiles". METS is an * abstract and flexible framework that can encompass many * different kinds of metadata and inner package structures. + * *

- * Configuration: - * If the property mets.submission.preserveManifest is true, + * Package Parameters: + *

    + *
  • validate -- true/false attempt to schema-validate the METS manifest.
  • + *
  • manifestOnly -- package consists only of a manifest document.
  • + *
  • ignoreHandle -- true/false, ignore AIP's idea of handle when ingesting.
  • + *
  • ignoreParent -- true/false, ignore AIP's idea of parent when ingesting.
  • + *
+ *

+ * Configuration Properties: + *

    + *
  • mets.CONFIGNAME.ingest.preserveManifest - if true, * the METS manifest itself is preserved in a bitstream named * mets.xml in the METADATA bundle. If it is - * false (the default), the manifest is discarded after ingestion. + * false (the default), the manifest is discarded after ingestion.
  • * + *
  • mets.CONFIGNAME.ingest.manifestBitstreamFormat - short name of + * the bitstream format to apply to the manifest; MUST be specified when + * preserveManifest is true.
  • + * + *
  • mets.default.ingest.crosswalk.MD_SEC_NAME = PLUGIN_NAME + * Establishes a default crosswalk plugin for the given type of metadata + * in a METS mdSec (e.g. "DC", "MODS"). The plugin may be either a stream + * or XML-oriented ingestion crosswalk. Subclasses can override the + * default mapping with their own, substituting their configurationName + * for "default" in the configuration property key above.
  • + * + *
  • mets.CONFIGNAME.ingest.useCollectionTemplate - if true, + * when an item is created, use the collection template. If it is + * false (the default), any existing collection template is ignored.
  • + *
+ * * @author Larry Stone + * @author Tim Donohue * @version $Revision$ * @see org.dspace.content.packager.METSManifest + * @see AbstractPackageIngester + * @see PackageIngester */ public abstract class AbstractMETSIngester - implements PackageIngester + extends AbstractPackageIngester { /** log4j category */ private static Logger log = Logger.getLogger(AbstractMETSIngester.class); - /** Filename of manifest, relative to package toplevel. */ - public static final String MANIFEST_FILE = "mets.xml"; - - // bitstream format name of magic METS SIP format.. - private static final String MANIFEST_BITSTREAM_FORMAT = - "DSpace METS SIP"; - - // value of mets.submission.preserveManifest config key - private static final boolean preserveManifest = - ConfigurationManager.getBooleanProperty("mets.submission.preserveManifest", false); - - // value of mets.submission.useCollectionTemplate config key - private static final boolean useTemplate = - ConfigurationManager.getBooleanProperty("mets.submission.useCollectionTemplate", false); - /** - * An instance of MdrefManager holds the state needed to - * retrieve the contents (or bitstream corresponding to) an - * external metadata stream referenced by an mdRef - * element in the METS manifest. + * An instance of ZipMdrefManager holds the state needed to + * retrieve the contents of an external metadata stream + * referenced by an mdRef + * element in a Zipped up METS manifest. *

- * Initialize it with the DSpace Bundle containing all of the + * Initialize it with the Content (ORIGINAL) Bundle containing all of the * metadata bitstreams. Match an mdRef by finding the bitstream * with the same name. */ protected class MdrefManager implements METSManifest.Mdref { - private Bundle mdBundle = null; - - // constructor initializes metadata bundle. - private MdrefManager(Bundle mdBundle) + private File packageFile = null; + private PackageParameters params; + + // constructor initializes from package file + private MdrefManager(File packageFile, PackageParameters params) { super(); - this.mdBundle = mdBundle; + this.packageFile = packageFile; + this.params = params; } /** - * Find the local Bitstream referenced in - * an mdRef element. - * @param mdref the METS mdRef element to locate the bitstream for. - * @return bitstream or null if none found. - */ - public Bitstream getBitstreamForMdRef(Element mdref) - throws MetadataValidationException, IOException, SQLException, AuthorizeException - { - String path = METSManifest.getFileName(mdref); - if (mdBundle == null) - throw new MetadataValidationException("Failed referencing mdRef element, because there were no metadata files."); - return mdBundle.getBitstreamByName(path); - } - - /** * Make the contents of an external resource mentioned in * an mdRef element available as an InputStream. * See the METSManifest.MdRef interface for details. + * * @param mdref the METS mdRef element to locate the input for. * @return the input stream of its content. + * @see METSManifest */ public InputStream getInputStream(Element mdref) - throws MetadataValidationException, IOException, SQLException, AuthorizeException + throws MetadataValidationException, IOException { - Bitstream mdbs = getBitstreamForMdRef(mdref); - if (mdbs == null) - throw new MetadataValidationException("Failed dereferencing bitstream for mdRef element="+mdref.toString()); - return mdbs.retrieve(); + String path = METSManifest.getFileName(mdref); + if (packageFile == null) + throw new MetadataValidationException("Failed referencing mdRef element, because there is no package specified."); + + //Use the 'getFileInputStream()' method from the AbstractMETSIngester to retrieve + // the inputstream for the referenced external metadata file. + return AbstractMETSIngester.getFileInputStream(packageFile, params, path); } - } + }//end MdrefManager class /** - * Create a new DSpace item out of a METS content package. + * Create a new DSpace object out of a METS content package. * All contents are dictated by the METS manifest. - * Package is a ZIP archive, all files relative to top level + * Package is a ZIP archive (or optionally bare manifest XML document). + * In a Zip, all files relative to top level * and the manifest (as per spec) in mets.xml. * - * @param context - DSpace context. - * @param collection - collection under which to create new item. - * @param pkg - input stream containing package to ingest. - * @param license - may be null, which takes default license. - * @return workspace item created by ingest. + * @param context DSpace context. + * @param parent parent under which to create new object + * (may be null -- in which case ingester must determine parent from package + * or throw an error). + * @param pkgFile The package file to ingest + * @param params Properties-style list of options (interpreted by each packager). + * @param license may be null, which takes default license. + * @return DSpaceObject created by ingest. + * * @throws PackageValidationException if package is unacceptable or there is - * a fatal error turning it into an Item. + * a fatal error turning it into a DSpaceObject. + * @throws CrosswalkException + * @throws AuthorizeException + * @throws SQLException + * @throws IOException */ - public WorkspaceItem ingest(Context context, Collection collection, - InputStream pkg, PackageParameters params, + public DSpaceObject ingest(Context context, DSpaceObject parent, + File pkgFile, PackageParameters params, String license) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException { - ZipInputStream zip = new ZipInputStream(pkg); - HashMap fileIdToBitstream = new HashMap(); - WorkspaceItem wi = null; - boolean success = false; - HashSet packageFiles = new HashSet(); + //parsed out METS Manifest from the file. + METSManifest manifest = null; - boolean validate = params.getBooleanProperty("validate", true); + //new DSpace object created + DSpaceObject dso = null; try { - /* 1. Read all the files in the Zip into bitstreams first, - * because we only get to take one pass through a Zip input - * stream. Give them temporary bitstream names corresponding - * to the same names they had in the Zip, since those MUST - * match the URL references in and elements. - */ - METSManifest manifest = null; - wi = WorkspaceItem.create(context, collection, useTemplate); - Item item = wi.getItem(); - Bundle contentBundle = null; - Bundle mdBundle = null; - ZipEntry ze; - while ((ze = zip.getNextEntry()) != null) - { - if (ze.isDirectory()) - continue; - Bitstream bs = null; - String fname = ze.getName(); - if (fname.equals(MANIFEST_FILE)) - { - if (preserveManifest) - { - mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME); - bs = mdBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip)); - bs.setName(fname); - bs.setSource(fname); + log.info(LogManager.getHeader(context, "package_parse", + "Parsing package for ingest, file=" + pkgFile.getName())); - // Get magic bitstream format to identify manifest. - BitstreamFormat manifestFormat = null; - manifestFormat = PackageUtils.findOrCreateBitstreamFormat(context, - MANIFEST_BITSTREAM_FORMAT, "application/xml", - MANIFEST_BITSTREAM_FORMAT+" package manifest"); - bs.setFormat(manifestFormat); + //Parse our ingest package, extracting out the METS manifest in the package + manifest = parsePackage(context, pkgFile, params); - manifest = METSManifest.create(bs.retrieve(), validate); - } - else - { - manifest = METSManifest.create(new PackageUtils.UnclosableInputStream(zip), validate); - continue; - } - } - else - { - // we need to create the bundle only the first time - if (contentBundle == null) - { - contentBundle = item.createBundle(Constants.CONTENT_BUNDLE_NAME); - } - bs = contentBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip)); - bs.setSource(fname); - bs.setName(fname); - } - packageFiles.add(fname); - bs.setSource(fname); - bs.update(); - } - zip.close(); - + //must have a METS Manifest to ingest anything if (manifest == null) - throw new PackageValidationException("No METS Manifest found (filename="+MANIFEST_FILE+"). Package is unacceptable."); + throw new PackageValidationException("No METS Manifest found (filename="+METSManifest.MANIFEST_FILE+"). Package is unacceptable!"); - // initial sanity checks on manifest (in subclass) + //validate our manifest checkManifest(manifest); - /* 2. Grovel a file list out of METS Manifest and compare - * it to the files in package, as an integrity test. - */ - List manifestContentFiles = manifest.getContentFiles(); + //if we are not restoring an object (i.e. we are submitting a new object) + // then, default the 'ignoreHandle' option to true (as a new object should get a new handle by default) + if(!params.restoreModeEnabled() && !params.containsKey("ignoreHandle")) + { //ignore the handle in the manifest, and instead create a new handle + params.addProperty("ignoreHandle", "true"); + } - // Compare manifest files with the ones found in package: - // a. Start with content files (mentioned in s) - HashSet missingFiles = new HashSet(); - for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) - { - // First locate corresponding Bitstream and make - // map of Bitstream to ID. - Element mfile = (Element)mi.next(); - String mfileId = mfile.getAttributeValue("ID"); - if (mfileId == null) - throw new PackageValidationException("Invalid METS Manifest: file element without ID attribute."); - String path = METSManifest.getFileName(mfile); - Bitstream bs = contentBundle.getBitstreamByName(path); - if (bs == null) - { - log.warn("Cannot find bitstream for filename=\""+path+ - "\", skipping it..may cause problems later."); - missingFiles.add(path); - } - else - { - fileIdToBitstream.put(mfileId, bs); + //if we have a Parent Object, default 'ignoreParent' option to True (this will ignore the Parent specified in manifest) + if(parent!=null && !params.containsKey("ignoreParent")) + { //ignore the parent in the manifest, and instead use the specified parent object + params.addProperty("ignoreParent", "true"); + } - // Now that we're done using Name to match to , - // set default bitstream Name to last path element; - // Zip entries all have '/' pathname separators - // NOTE: set default here, hopefully crosswalk of - // a bitstream techMD section will override it. - String fname = bs.getName(); - int lastSlash = fname.lastIndexOf('/'); - if (lastSlash >= 0 && lastSlash+1 < fname.length()) - bs.setName(fname.substring(lastSlash+1)); + //Actually ingest the object described by the METS Manifest + dso = ingestObject(context, parent, manifest, pkgFile, + params, license); - // Set Default bitstream format: - // 1. attempt to guess from MIME type - // 2. if that fails, guess from "name" extension. - String mimeType = mfile.getAttributeValue("MIMETYPE"); - BitstreamFormat bf = (mimeType == null) ? null : - BitstreamFormat.findByMIMEType(context, mimeType); - if (bf == null) - bf = FormatIdentifier.guessFormat(context, bs); - bs.setFormat(bf); + //Log whether we finished an ingest (create new obj) or a restore (restore previously existing obj) + String action = "package_ingest"; + if(params.restoreModeEnabled()) + action = "package_restore"; + log.info(LogManager.getHeader(context, action, + "Created new Object, type=" + Constants.typeText[dso.getType()] + + ", handle=" + dso.getHandle() + ", dbID=" + String.valueOf(dso.getID()))); - // if this bitstream belongs in another Bundle, move it: - String bundleName = manifest.getBundleName(mfile); - if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) - { - Bundle bn; - Bundle bns[] = item.getBundles(bundleName); - if (bns != null && bns.length > 0) - bn = bns[0]; - else - bn = item.createBundle(bundleName); - bn.addBitstream(bs); - contentBundle.removeBitstream(bs); - } + // Check if the Packager is currently running recursively. + // If so, this means the Packager will attempt to recursively + // ingest all referenced child packages. + if(params.recursiveModeEnabled()) + { + //Retrieve list of all Child object METS file paths from the current METS manifest. + // This is our list of known child packages + String[] childFilePaths = manifest.getChildMetsFilePaths(); - // finally, build compare lists by deleting matches. - if (packageFiles.contains(path)) - packageFiles.remove(path); - else - missingFiles.add(path); - } + //Save this list to our AbstractPackageIngester (and note which DSpaceObject the pkgs relate to) + //NOTE: The AbstractPackageIngester itself will perform the recursive ingest call, + // based on these child pkg references + for(int i=0; is - check and move - // to METADATA bundle. - for (Iterator mi = manifest.getMdFiles().iterator(); mi.hasNext(); ) - { - Element mdref = (Element)mi.next(); - String path = METSManifest.getFileName(mdref); + return dso; + } + catch (SQLException se) + { + // no need to really clean anything up, + // transaction rollback will get rid of it anyway. + dso = null; - // finally, build compare lists by deleting matches. - if (packageFiles.contains(path)) - packageFiles.remove(path); - else - missingFiles.add(path); + // Pass this exception on to the next handler. + throw se; + } + } - // if there is a bitstream with that name in Content, move - // it to the Metadata bundle: - Bitstream mdbs = contentBundle.getBitstreamByName(path); - if (mdbs != null) - { - if (mdBundle == null) - mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME); - mdBundle.addBitstream(mdbs); - contentBundle.removeBitstream(mdbs); - } + /** + * Parse a given input package, ultimately returning the METS manifest out + * of the package. METS manifest is assumed to be a file named 'mets.xml' + * + * @param context DSpace Context + * @param pkgFile package to parse + * @param params Ingestion parameters + * @return parsed out METSManifest + * @throws IOException + * @throws SQLException + * @throws AuthorizeException + * @throws MetadataValidationException + */ + protected METSManifest parsePackage(Context context, File pkgFile, + PackageParameters params) + throws IOException, SQLException, AuthorizeException, MetadataValidationException + { + //whether or not to validate the METSManifest before processing (default=false) + // (Even though it's preferrable to validate -- it's costly and takes a lot of time, unless you cache schemas locally) + boolean validate = params.getBooleanProperty("validate", false); + + //parsed out METS Manifest from the file. + METSManifest manifest = null; + + //try to locate the METS Manifest in package + // 1. read "package" stream: it will be either bare Manifest + // or Package contents into bitstreams, depending on params: + if (params.getBooleanProperty("manifestOnly", false)) + { + // parse the bare METS manifest and sanity-check it. + manifest = METSManifest.create(new FileInputStream(pkgFile), + validate, getConfigurationName()); + } + else + { + ZipFile zip = new ZipFile(pkgFile); + + //Retrieve the manifest file entry (named mets.xml) + ZipEntry manifestEntry = zip.getEntry(METSManifest.MANIFEST_FILE); + + // parse the manifest and sanity-check it. + manifest = METSManifest.create( zip.getInputStream(manifestEntry), + validate, getConfigurationName()); + + //close the Zip file for now + //(we'll extract the other files from zip when we need them) + zip.close(); + } + + //return our parsed out METS manifest + return manifest; + } + + + /** + * Ingest/import a single DSpace Object, based on the associated METS Manifest + * and the parameters passed to the METSIngester + * + * @param context DSpace Context + * @param parent Parent DSpace Object + * @param manifest the parsed METS Manifest + * @param pkgFile the full package file (which may include content files if a zip) + * @param params Parameters passed to METSIngester + * @param license DSpace license agreement + * @return completed result as a DSpace object + * @throws IOException + * @throws SQLException + * @throws AuthorizeException + * @throws CrosswalkException + * @throws MetadataValidationException + * @throws PackageValidationException + */ + protected DSpaceObject ingestObject(Context context, DSpaceObject parent, + METSManifest manifest, + File pkgFile, + PackageParameters params, String license) + throws IOException, SQLException, AuthorizeException, + CrosswalkException, + MetadataValidationException, PackageValidationException + { + //type of DSpace Object (one of the type constants) + int type; + + //-- Step 1 -- + //Extract basic information (type, parent, handle) about DSpace object represented by manifest + type = getObjectType(manifest); + + // if no parent passed in (or ignoreParent is false), + // attempt to determine parent DSpace object from manifest + if(parent==null || !params.getBooleanProperty("ignoreParent", false)) + { + // get parent object from manifest + parent = getParentObject(context, manifest); + } + + String handle = null; + //if we are *not* ignoring the handle in manifest (i.e. ignoreHandle is false) + if(!params.getBooleanProperty("ignoreHandle", false)) + { + // get handle from manifest + handle = getObjectHandle(manifest); + } + + //-- Step 2 -- + // Create our DSpace Object based on info parsed from manifest, and packager params + DSpaceObject dso = PackageUtils.createDSpaceObject(context, parent, type, handle, params); + + //if we are uninitialized, throw an error -- something's wrong! + if(dso==null) + { + throw new PackageValidationException("Unable to initialize object specified by package (type='" + type + "', handle='" + handle + "' and parent='" + parent.getHandle() + "')."); + } + + //-- Step 3 -- + // Run our Administrative metadata crosswalks! + + //initialize callback object which will retrieve external inputstreams for any 's found in METS + MdrefManager callback = new MdrefManager(pkgFile, params); + + // Crosswalk the sourceMD first, so that we make sure to fill in + // submitter info (and any other initial applicable info) + manifest.crosswalkObjectSourceMD(context, dso, callback); + + // Next, crosswalk techMD, digiprovMD, rightsMD + manifest.crosswalkObjectOtherAdminMD(context, dso, callback); + + + //-- Step 4 -- + // Add all content files as bitstreams on new DSpace Object + if (type == Constants.ITEM) + { + Item item = (Item)dso; + // @TODO: maybe add an option to apply template Item on ingest?? + + // save manifest as a bitstream in Item if desired + if (preserveManifest()) + { + addManifestBitstream(context, item, manifest); } - // KLUDGE: make sure Manifest file doesn't get flagged as missing - // or extra, since it won't be mentioned in the manifest. - if (packageFiles.contains(MANIFEST_FILE)) - packageFiles.remove(MANIFEST_FILE); + // save all other bitstreams in Item + addBitstreams(context, item, manifest, pkgFile, params, callback); - // Give subclass a chance to refine the lists of in-package - // and missing files, delete extraneous files, etc. - checkPackageFiles(packageFiles, missingFiles, manifest); + // have subclass manage license since it may be extra package file. + addLicense(context, item, license, (Collection)dso.getParentObject(), params); - // Any discrepency in file lists is a fatal error: - if (!(packageFiles.isEmpty() && missingFiles.isEmpty())) + // XXX FIXME + // should set lastModifiedTime e.g. when ingesting AIP. + // maybe only do it in the finishObject() callback for AIP. + + } // end if ITEM + else if (type == Constants.COLLECTION || + type == Constants.COMMUNITY) + { + // Add logo if one is referenced from manifest + addContainerLogo(context, dso, manifest, pkgFile, params); + }//end if Community/Collection + else + throw new PackageValidationException("Unknown DSpace Object type in package, type="+String.valueOf(type)); + + //-- Step 5 -- + // Run our Descriptive metadata (dublin core, etc) crosswalks! + crosswalkObjectDmd(context, dso, manifest, callback, manifest.getItemDmds(), params); + + // For Items, also sanity-check the metadata for minimum requirements. + if (type == Constants.ITEM) + PackageUtils.checkItemMetadata((Item)dso); + + //-- Step 6 -- + // Finish things up! + + // Subclass hook for final checks and rearrangements + // (this allows subclasses to do some final validation / changes as necessary) + finishObject(context, dso); + + // Update the object to make sure all changes are committed + PackageUtils.updateDSpaceObject(dso); + + return dso; + } + + + /** + * Replace the contents of a single DSpace Object, based on the associated METS Manifest + * and the parameters passed to the METSIngester + * + * @param context DSpace Context + * @param dso DSpace Object to replace + * @param manifest the parsed METS Manifest + * @param pkgFile the full package file (which may include content files if a zip) + * @param params Parameters passed to METSIngester + * @param license DSpace license agreement + * @return completed result as a DSpace object + * @throws IOException + * @throws SQLException + * @throws AuthorizeException + * @throws CrosswalkException + * @throws MetadataValidationException + * @throws PackageValidationException + */ + protected DSpaceObject replaceObject(Context context, DSpaceObject dso, + METSManifest manifest, + File pkgFile, + PackageParameters params, String license) + throws IOException, SQLException, AuthorizeException, + CrosswalkException, + MetadataValidationException, PackageValidationException + { + //-- Step 1 -- + // Before going forward with the replace, let's verify these objects are of the same TYPE! + // (We don't want to go around trying to replace a COMMUNITY with an ITEM -- that's dangerous.) + int manifestType = getObjectType(manifest); + if(manifestType!=dso.getType()) + { + throw new PackageValidationException("The object type of the METS manifest (" + Constants.typeText[manifestType] + ") does not match up with the object type (" + Constants.typeText[dso.getType()] + ") of the DSpaceObject to be replaced!"); + } + + if (log.isDebugEnabled()) + log.debug("Object to be replaced (handle=" + dso.getHandle() +") is " + Constants.typeText[dso.getType()] + " id=" + dso.getID()); + + //-- Step 2 -- + // Clear out current object (as we are replacing all its contents & metadata) + + //remove all files attached to this object + // (For communities/collections this just removes the logo bitstream) + PackageUtils.removeAllBitstreams(dso); + + //clear out all metadata values associated with this object + PackageUtils.clearAllMetadata(dso); + + //@TODO -- We are currently NOT clearing out the following during a replace. + // So, even after a replace, the following information may be retained in the system + // 1. Groups/People in system + // 2. Rights/Permissions in system or on objects + // 3. Collection item templates or Content Source info (e.g. OAI Harvesting collections) + // 4. Item status (embargo, withdrawn) or mappings to other collections + + //-- Step 3 -- + // Run our Administrative metadata crosswalks! + + //initialize callback object which will retrieve external inputstreams for any 's found in METS + MdrefManager callback = new MdrefManager(pkgFile, params); + + // Crosswalk the sourceMD first, so that we make sure to fill in + // submitter info (and any other initial applicable info) + manifest.crosswalkObjectSourceMD(context, dso, callback); + + // Next, crosswalk techMD, digiprovMD, rightsMD + manifest.crosswalkObjectOtherAdminMD(context, dso, callback); + + + //-- Step 4 -- + // Add all content files as bitstreams on new DSpace Object + if (dso.getType() == Constants.ITEM) + { + Item item = (Item)dso; + // @TODO: maybe add an option to apply template Item on ingest?? + + // save manifest as a bitstream in Item if desired + if (preserveManifest()) { - StringBuffer msg = new StringBuffer("Package is unacceptable: contents do not match manifest."); - if (!missingFiles.isEmpty()) - { - msg.append("\nPackage is missing these files listed in Manifest:"); - for (Iterator mi = missingFiles.iterator(); mi.hasNext(); ) - msg.append("\n\t"+(String)mi.next()); - } - if (!packageFiles.isEmpty()) - { - msg.append("\nPackage contains extra files NOT in manifest:"); - for (Iterator mi = packageFiles.iterator(); mi.hasNext(); ) - msg.append("\n\t"+(String)mi.next()); - } - throw new PackageValidationException(msg.toString()); + addManifestBitstream(context, item, manifest); } - /* 3. crosswalk the metadata - */ - // get mdref'd streams from "callback" object. - MdrefManager callback = new MdrefManager(mdBundle); + // save all other bitstreams in Item + addBitstreams(context, item, manifest, pkgFile, params, callback); - chooseItemDmd(context, item, manifest, callback, manifest.getItemDmds(), params); + // have subclass manage license since it may be extra package file. + addLicense(context, item, license, (Collection)dso.getParentObject(), params); - // crosswalk content bitstreams too. - for (Iterator ei = fileIdToBitstream.entrySet().iterator(); - ei.hasNext();) + // FIXME ? + // should set lastModifiedTime e.g. when ingesting AIP. + // maybe only do it in the finishObject() callback for AIP. + + } // end if ITEM + else if (dso.getType() == Constants.COLLECTION || + dso.getType() == Constants.COMMUNITY) + { + // Add logo if one is referenced from manifest + addContainerLogo(context, dso, manifest, pkgFile, params); + }//end if Community/Collection + + + //-- Step 5 -- + // Run our Descriptive metadata (dublin core, etc) crosswalks! + crosswalkObjectDmd(context, dso, manifest, callback, manifest.getItemDmds(), params); + + // For Items, also sanity-check the metadata for minimum requirements. + if (dso.getType() == Constants.ITEM) + PackageUtils.checkItemMetadata((Item)dso); + + //-- Step 6 -- + // Finish things up! + + // Subclass hook for final checks and rearrangements + // (this allows subclasses to do some final validation / changes as necessary) + finishObject(context, dso); + + // Update the object to make sure all changes are committed + PackageUtils.updateDSpaceObject(dso); + + return dso; + } + + + + /** + * Add Bitstreams to an Item, based on the files listed in the METS Manifest + * + * @param context DSpace Context + * @param item DSpace Item + * @param manifest METS Manifest + * @param pkgFile the full package file (which may include content files if a zip) + * @param params Ingestion Parameters + * @param mdRefCallback MdrefManager storing info about mdRefs in manifest + * @throws SQLException + * @throws IOException + * @throws AuthorizeException + * @throws MetadataValidationException + * @throws CrosswalkException + * @throws PackageValidationException + */ + protected void addBitstreams(Context context, Item item, METSManifest manifest, + File pkgFile, + PackageParameters params, MdrefManager mdRefCallback) + throws SQLException, IOException, AuthorizeException, + MetadataValidationException, CrosswalkException, + PackageValidationException + { + // Step 1 -- find the ID of the primary or Logo bitstream in manifest + String primaryID = null; + Element primaryFile = manifest.getPrimaryOrLogoBitstream(); + if (primaryFile != null) + { + primaryID = primaryFile.getAttributeValue("ID"); + if (log.isDebugEnabled()) + log.debug("Got primary bitstream file ID=\""+primaryID+"\""); + } + + // Step 2 -- find list of all content files from manifest + // Loop through these files, and add them one by one to Item + List manifestContentFiles = manifest.getContentFiles(); + + boolean setPrimaryBitstream = false; + BitstreamFormat unknownFormat = BitstreamFormat.findUnknown(context); + + for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) + { + Element mfile = (Element)mi.next(); + + //basic validation -- check that it has an ID attribute + String mfileID = mfile.getAttributeValue("ID"); + if (mfileID == null) + throw new PackageValidationException("Invalid METS Manifest: file element without ID attribute."); + + // retrieve path/name of file in manifest + String path = METSManifest.getFileName(mfile); + + // extract the file input stream from package (or retrieve externally, if it is an externally referenced file) + InputStream fileStream = getFileInputStream(pkgFile, params, path); + + // retrieve bundle name from manifest + String bundleName = METSManifest.getBundleName(mfile); + + // Find or create the bundle where bitstrem should be attached + Bundle bundle; + Bundle bns[] = item.getBundles(bundleName); + if (bns != null && bns.length > 0) + bundle = bns[0]; + else + bundle = item.createBundle(bundleName); + + // Create the bitstream in the bundle & initialize its name + Bitstream bitstream = bundle.createBitstream(fileStream); + bitstream.setName(path); + + // crosswalk this bitstream's administrative metadata located in METS manifest (or referenced externally) + manifest.crosswalkBitstream(context, bitstream, mfileID, mdRefCallback); + + // is this the primary bitstream? + if (primaryID != null && mfileID.equals(primaryID)) { - Map.Entry ee = (Map.Entry)ei.next(); - manifest.crosswalkBitstream(context, (Bitstream)ee.getValue(), - (String)ee.getKey(), callback); + bundle.setPrimaryBitstreamID(bitstream.getID()); + bundle.update(); + setPrimaryBitstream = true; } - // Take a second pass over files to correct names of derived files - // (e.g. thumbnails, extracted text) to what DSpace expects: - for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) + // Run any finishing activities -- this allows subclasses to + // change default bitstream information + finishBitstream(context, bitstream, mfile, manifest, params); + + //Last-ditch attempt to divine the format, if crosswalk failed to set it: + // 1. attempt to guess from MIME type + // 2. if that fails, guess from "name" extension. + if (bitstream.getFormat().equals(unknownFormat)) { + if (log.isDebugEnabled()) + log.debug("Guessing format of Bitstream left un-set: "+bitstream.toString()); + String mimeType = mfile.getAttributeValue("MIMETYPE"); + BitstreamFormat bf = (mimeType == null) ? null : + BitstreamFormat.findByMIMEType(context, mimeType); + if (bf == null) + bf = FormatIdentifier.guessFormat(context, bitstream); + bitstream.setFormat(bf); + } + bitstream.update(); + }//end for each manifest file + + + // Step 3 -- Sanity checks + // sanity check for primary bitstream + if (primaryID != null && !setPrimaryBitstream) + log.warn("Could not find primary bitstream file ID=\""+primaryID+"\" in manifest file \"" + pkgFile.getAbsolutePath() + "\""); + } + + /** + * Save/Preserve the METS Manifest as a Bitstream attached to the + * given DSpace item. + * + * @param context DSpace Context + * @param item DSpace Item + * @param manifest The METS Manifest + * @throws SQLException + * @throws AuthorizeException + * @throws PackageValidationException + */ + protected void addManifestBitstream(Context context, Item item, METSManifest manifest) + throws IOException, SQLException, AuthorizeException, PackageValidationException + { + //We'll save the METS Manifest as part of the METADATA bundle. + Bundle mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME); + + //Create a Bitstream from the METS Manifest's content + Bitstream manifestBitstream = mdBundle.createBitstream(manifest.getMetsAsStream()); + manifestBitstream.setName(METSManifest.MANIFEST_FILE); + manifestBitstream.setSource(METSManifest.MANIFEST_FILE); + manifestBitstream.update(); + + // Get magic bitstream format to identify manifest. + String fmtName = getManifestBitstreamFormat(); + if (fmtName == null) + throw new PackageValidationException("Configuration Error: No Manifest BitstreamFormat configured for METS ingester type="+getConfigurationName()); + BitstreamFormat manifestFormat = PackageUtils.findOrCreateBitstreamFormat(context, + fmtName, "application/xml", + fmtName+" package manifest"); + manifestBitstream.setFormat(manifestFormat); + manifestBitstream.update(); + } + + + /** + * Add a Logo to a Community or Collection container object based on + * a METS Manifest. + * + * @param context DSpace Context + * @param dso DSpace Container Object + * @param manifest METS Manifest + * @param pkgFile the full package file (which may include content files if a zip) + * @param params Ingestion Parameters + * @throws SQLException + * @throws IOException + * @throws AuthorizeException + * @throws MetadataValidationException + * @throws PackageValidationException + */ + protected void addContainerLogo(Context context, DSpaceObject dso, + METSManifest manifest, File pkgFile, + PackageParameters params) + throws SQLException, IOException, AuthorizeException, + MetadataValidationException, PackageValidationException + { + + Element logoRef = manifest.getPrimaryOrLogoBitstream(); + + //only continue if a logo specified in manifest + if (logoRef != null) + { + //Find ID of logo file + String logoID = logoRef.getAttributeValue("ID"); + + // Loop through manifest content files to find actual logo file + for (Iterator mi = manifest.getContentFiles().iterator(); mi.hasNext(); ) + { Element mfile = (Element)mi.next(); - String bundleName = manifest.getBundleName(mfile); - if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) + if (logoID.equals(mfile.getAttributeValue("ID"))) { - Element origFile = manifest.getOriginalFile(mfile); - if (origFile != null) - { - String ofileId = origFile.getAttributeValue("ID"); - Bitstream obs = (Bitstream)fileIdToBitstream.get(ofileId); - String newName = makeDerivedFilename(bundleName, obs.getName()); - if (newName != null) - { - String mfileId = mfile.getAttributeValue("ID"); - Bitstream bs = (Bitstream)fileIdToBitstream.get(mfileId); - bs.setName(newName); - bs.update(); - } - } + String path = METSManifest.getFileName(mfile); + + // extract the file input stream from package (or retrieve externally, if it is an externally referenced file) + InputStream fileStream = getFileInputStream(pkgFile, params, path); + + // Add this logo to the Community/Collection + if (dso.getType() == Constants.COLLECTION) + ((Collection)dso).setLogo(fileStream); + else + ((Community)dso).setLogo(fileStream); + + break; } - } + }// end for each file in manifest + }// end if logo reference found + } - // Sanity-check the resulting metadata on the Item: - PackageUtils.checkMetadata(item); - /* 4. Set primary bitstream; same Bundle - */ - Element pbsFile = manifest.getPrimaryBitstream(); - if (pbsFile != null) + /** + * Replace an existing DSpace object with the contents of a METS-based package. + * All contents are dictated by the METS manifest. + * Package is a ZIP archive (or optionally bare manifest XML document). + * In a Zip, all files relative to top level + * and the manifest (as per spec) in mets.xml. + *

+ * This method is similar to ingest(), except that if the object already exists in + * DSpace, it is deleted & replaced. The METS-based package is then used to ingest a new + * object in its place. + *

+ * In order to attempt to avoid data loss, this method first ingests a new object (with same parent) + * based on the METS manifest. Assuming that succeeds, the existing object is removed, + * and its handle is reassigned to the new object. If the ingest fails, the new object + * is removed and the existing object is left in tact. + * + * @param context DSpace Context + * @param dsoToReplace DSpace Object to be replaced (may be null if it will be specified in the METS manifest itself) + * @param pkgFile The package file to ingest + * @param params Parameters passed from the packager script + * @return DSpaceObject created by ingest. + * @throws PackageValidationException if package is unacceptable or there is + * a fatal error turning it into a DSpace Object. + * @throws IOException + * @throws SQLException + * @throws AuthorizeException + * @throws CrosswalkException + */ + public DSpaceObject replace(Context context, DSpaceObject dsoToReplace, + File pkgFile, PackageParameters params) + throws PackageValidationException, CrosswalkException, + AuthorizeException, SQLException, IOException + { + //parsed out METS Manifest from the file. + METSManifest manifest = null; + + //resulting DSpace Object + DSpaceObject dso = null; + + try + { + log.info(LogManager.getHeader(context, "package_parse", + "Parsing package for replace, file=" + pkgFile.getName())); + + //Parse our ingest package, extracting out the METS manifest in the package + manifest = parsePackage(context, pkgFile, params); + + //must have a METS Manifest to replace anything + if (manifest == null) + throw new PackageValidationException("No METS Manifest found (filename="+METSManifest.MANIFEST_FILE+"). Package is unacceptable!"); + + //it's possible that the object to replace will be passed in as null + // Let's determine the handle of the object to replace + if(dsoToReplace==null) { - Bitstream pbs = (Bitstream)fileIdToBitstream.get(pbsFile.getAttributeValue("ID")); - if (pbs == null) - log.error("Got Primary Bitstream file ID="+pbsFile.getAttributeValue("ID")+ - ", but found no corresponding bitstream."); - else + // since we don't know what we are replacing, we'll have to + // try to determine it from the parsed manifest + + // Handle of object described by METS should be in OBJID + String handleURI = manifest.getObjID(); + String handle = decodeHandleURN(handleURI); + try { - Bundle bn[] = pbs.getBundles(); - if (bn.length > 0) - bn[0].setPrimaryBitstreamID(pbs.getID()); - else - log.error("Sanity check, got primary bitstream without any parent bundle."); + // Attempt to resolve this handle to an existing object + dsoToReplace = HandleManager.resolveToObject(context, handle); } + catch(IllegalStateException ie) + { + //we don't care if this errors out -- we can continue whether or not an object exists with this handle + } } + // NOTE: At this point, it's still possible we don't have an object to replace + // This could happen when there is actually no existing object in DSpace using that handle + // (In which case, we're actually just doing a "restore" -- so we aren't going to throw an error or complain) - // have subclass manage license since it may be extra package file. - addLicense(context, collection, item, manifest, callback, license ); + // If we were unable to find the object to replace, then assume we are restoring it + if(dsoToReplace==null) + { + //In order to restore an object, we must first figure out which parent it belongs to + DSpaceObject parent = null; + // Let's try to figure out the parent using the Manifest + // look for a Parent Object link in manifest + String parentLink = manifest.getParentOwnerLink(); - // subclass hook for final checks and rearrangements - finishItem(context, item); + //verify we have a valid Parent Object + if(parentLink!=null && parentLink.length()>0) + { + parent = HandleManager.resolveToObject(context, parentLink); + if (parent == null) + throw new UnsupportedOperationException("Could not find a parent DSpaceObject references as '" + parentLink + "' in the METS Manifest. A valid parent DSpaceObject must be specified in the METS Manifest itself."); + } + else + throw new UnsupportedOperationException("Could not find a parent DSpaceObject where we can ingest this package. A valid parent DSpaceObject must be specified in the METS Manifest itself."); - // commit any changes to bundles - Bundle allBn[] = item.getBundles(); - for (int i = 0; i < allBn.length; ++i) + // As this object doesn't already exist, we will perform an ingest of a new object in order to restore it + dso = ingestObject(context, parent, manifest, pkgFile, params, null); + + //Log that we created an object + log.info(LogManager.getHeader(context, "package_replace", + "Created new Object, type=" + Constants.typeText[dso.getType()] + + ", handle=" + dso.getHandle() + ", dbID=" + String.valueOf(dso.getID()))); + } + else // otherwise, we found the DSpaceObject to replace -- so, replace it! { - allBn[i].update(); + // Actually replace the object described by the METS Manifest + // NOTE: This will perform an in-place replace of all metadata and files currently associated with the object. + dso = replaceObject(context, dsoToReplace, manifest, + pkgFile, + params, null); + + //Log that we replaced an object + log.info(LogManager.getHeader(context, "package_replace", + "Replaced Object, type=" + Constants.typeText[dso.getType()] + + ", handle=" + dso.getHandle() + ", dbID=" + String.valueOf(dso.getID()))); } + + // Check if the Packager is currently running recursively. + // If so, this means the Packager will attempt to recursively + // replace all referenced child packages. + if(params.recursiveModeEnabled()) + { + //Retrieve list of all Child object METS file paths from the current METS manifest. + // This is our list of known child packages + String[] childFilePaths = manifest.getChildMetsFilePaths(); - wi.update(); - success = true; - log.info(LogManager.getHeader(context, "ingest", - "Created new Item, db ID="+String.valueOf(item.getID())+ - ", WorkspaceItem ID="+String.valueOf(wi.getID()))); - return wi; + //Save this list to our AbstractPackageIngester (and note which DSpaceObject the pkgs relate to) + //NOTE: The AbstractPackageIngester itself will perform the recursive ingest call, + // based on these child pkg references + for(int i=0; i. You should override + * this method if your METS manifest specifies the parent object in another + * location. + * + * @param context DSpace Context + * @param manifest METS manifest + * @returns a DSpace Object which is the parent (or null, if not found) + * @throws PackageValidationException if parent reference cannot be found in manifest + * @throws MetadataValidationException + * @throws SQLException */ - public Item replace(Context ctx, Item item, InputStream pckage, PackageParameters params) - throws PackageException, UnsupportedOperationException, - CrosswalkException, AuthorizeException, - SQLException, IOException + public DSpaceObject getParentObject(Context context, METSManifest manifest) + throws PackageValidationException, MetadataValidationException, + SQLException { - throw new UnsupportedOperationException("The replace operation is not implemented."); + DSpaceObject parent = null; + // look for a Parent Object link in manifest + String parentLink = manifest.getParentOwnerLink(); + + //verify we have a valid Parent Object + if(parentLink!=null && parentLink.length()>0) + { + parent = HandleManager.resolveToObject(context, parentLink); + if (parent == null) + throw new UnsupportedOperationException("Could not find a parent DSpaceObject references as '" + parentLink + "' in the METS Manifest. A parent DSpaceObject must be specified from either the 'packager' command or noted in the METS Manifest itself."); + } + else + throw new UnsupportedOperationException("Could not find a parent DSpaceObject where we can ingest this package. A parent DSpaceObject must be specified from either the 'packager' command or noted in the METS Manifest itself."); + + return parent; } - // return name of derived file as if MediaFilter created it, or null - private String makeDerivedFilename(String bundleName, String origName) + /** + * Determines the handle of the DSpace object represented in this METS doc. + *

+ * This is a default implementation which assumes the handle of the + * DSpace Object can be found in the @OBJID attribute. You should + * override this method if your METS manifest specifies the handle in + * another location. + * + * @param manifest METS manifest + * @returns handle as a string (or null, if not found) + * @throws PackageValidationException if handle cannot be found in manifest + */ + public String getObjectHandle(METSManifest manifest) + throws PackageValidationException, MetadataValidationException, + SQLException { - // get the MediaFilter that would create this bundle: - String mfNames[] = PluginManager.getAllPluginNames(MediaFilter.class); + //retrieve handle URI from manifest + String handleURI = manifest.getObjID(); - for (int i = 0; i < mfNames.length; ++i) + //decode this URI (by removing the 'hdl:' prefix) + String handle = decodeHandleURN(handleURI); + + if(handle==null || handle.length()==0) { - MediaFilter mf = (MediaFilter)PluginManager.getNamedPlugin(MediaFilter.class, mfNames[i]); - if (bundleName.equals(mf.getBundleName())) - return mf.getFilteredName(origName); + throw new PackageValidationException("The DSpace Object handle required to ingest this package could not be resolved in manifest. The is missing."); } - return null; + + return handle; } /** + * Retrieve the inputStream for a File referenced from a specific path within + * a METS package. + *

+ * If the packager is set to 'manifest-only' (i.e. pkgFile is just a manifest), + * we assume the file is available for download via a URL. + *

+ * Otherwise, the pkgFile is a Zip, so the file should be retrieved from + * within that Zip package. + * + * @param pkgFile the full package file (which may include content files if a zip) + * @param params Parameters passed to METSIngester + * @param path the File path (either path in Zip package or a URL) + * @return the InputStream for the file + */ + protected static InputStream getFileInputStream(File pkgFile, PackageParameters params, String path) + throws MetadataValidationException, IOException + { + //If this is a manifest only package (i.e. not a zip file) + if (params.getBooleanProperty("manifestOnly", false)) + { + //NOTE: since we are only dealing with a METS manifest, + // we will assume all external files are available via URLs. + try + { + //attempt to open a connection to given URL + URL fileURL = new URL(path); + URLConnection connection = fileURL.openConnection(); + + //open stream to access file contents + return connection.getInputStream(); + } + catch(IOException io) + { + log.error("Unable to retrieve external file from URL '" + path + "' for manifest-only METS package. All externally referenced files must be retrievable via URLs."); + //pass exception upwards + throw io; + } + } + else + { + //open the Zip package + ZipFile zipPackage = new ZipFile(pkgFile); + + //Retrieve the manifest file entry by name + ZipEntry manifestEntry = zipPackage.getEntry(path); + + //Get inputStream associated with this file + return zipPackage.getInputStream(manifestEntry); + } + } + + + /** * Profile-specific tests to validate manifest. The implementation * can access the METS document through the manifest * variable, an instance of METSManifest. @@ -519,42 +1156,12 @@ throws MetadataValidationException; /** - * Hook for subclass to modify the test of the package's - * integrity, and add other tests. E.g. evaluate a PGP signature of - * the manifest in a separate file. - *

- * The packageFiles contains "extra" files that were in - * the package but were not referenced by the METS manifest (either as - * content or metadata (mdRefs)). - * The implementation of this method should look for any "extra" files - * uses (e.g. a checksum or cryptographic signature for the manifest - * itself) and remove them from the Set. - *

- * The missingFiles set is for - * any files - * referenced by the manifest but not found in the package. - * The implementation can check it for "false positives", or add - * other missing files it knows of. - *

- * If either of the Sets missingFiles - * or packageFiles - * is not empty, the ingest will fail. - * - * @param packageFiles files in package but not referenced by METS - * @param missingFiles files referenced by manifest but not in package - * - */ - abstract public void checkPackageFiles(Set packageFiles, Set missingFiles, - METSManifest manifest) - throws PackageValidationException, CrosswalkException; - - /** * Select the dmdSec element(s) to apply to the * Item. The implementation is responsible for choosing which * (if any) of the metadata sections to crosswalk to get the * descriptive metadata for the item being ingested. It is * responsible for calling the crosswalk, using the manifest's helper - * i.e. manifest.crosswalkItem(context,item,dmdElement,callback); + * i.e. manifest.crosswalkItemDmd(context,item,dmdElement,callback); * (The callback argument is a reference to itself since the * class also implements the METSManifest.MdRef interface * to fetch package files referenced by mdRef elements.) @@ -569,10 +1176,10 @@ * @param dmds array of Elements, each a METS dmdSec that applies to the Item as a whole. * @param params any user parameters passed to the Packager script */ - abstract public void chooseItemDmd(Context context, Item item, + abstract public void crosswalkObjectDmd(Context context, DSpaceObject dso, METSManifest manifest, MdrefManager callback, Element dmds[], PackageParameters params) - throws CrosswalkException, + throws CrosswalkException, PackageValidationException, AuthorizeException, SQLException, IOException; /** @@ -587,28 +1194,56 @@ * information of interest, e.g. a Creative Commons license. *

* This framework does not add any licenses by default. + *

+ * Note that crosswalking rightsMD sections can also add a deposit or CC + * license to the object. * * @param context the DSpace context * @param collection DSpace Collection to which the item is being submitted. * @param license optional user-supplied Deposit License text (may be null) */ - abstract public void addLicense(Context context, Collection collection, - Item item, METSManifest manifest, - MdrefManager callback, String license) - throws PackageValidationException, CrosswalkException, + abstract public void addLicense(Context context, Item item, String license, + Collection collection, PackageParameters params) + throws PackageValidationException, AuthorizeException, SQLException, IOException; /** - * Hook for final "finishing" operations on the new Item. - * This method is called when the new Item is otherwise complete and + * Hook for final "finishing" operations on the new Object. + * This method is called when the new Object is otherwise complete and * ready to be returned. The implementation should use this * opportunity to make whatever final checks and modifications are * necessary. * * @param context the DSpace context */ - abstract public void finishItem(Context context, Item item) + abstract public void finishObject(Context context, DSpaceObject dso) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException; + /** + * Determines what type of DSpace object is represented in this METS doc. + * @returns one of the object types in Constants. + */ + abstract public int getObjectType(METSManifest manifest) + throws PackageValidationException; + + /** + * Subclass-dependent final processing on a Bitstream; could include + * fixing up the name, bundle, other attributes. + */ + abstract public void finishBitstream(Context context, + Bitstream bs, + Element mfile, + METSManifest manifest, + PackageParameters params) + throws MetadataValidationException, SQLException, AuthorizeException, IOException; + + + /** + * Returns keyword that makes the configuration keys of this subclass + * unique, e.g. if it returns NAME, the key would be: + * "mets.NAME.ingest.preserveManifest = true" + */ + abstract public String getConfigurationName(); + }