Index: METSManifest.java
===================================================================
--- METSManifest.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
+++ METSManifest.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
@@ -53,16 +53,17 @@
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.DSpaceObject;
-import org.dspace.content.Item;
import org.dspace.content.crosswalk.CrosswalkException;
import org.dspace.content.crosswalk.CrosswalkObjectNotSupported;
import org.dspace.content.crosswalk.MetadataValidationException;
import org.dspace.content.crosswalk.IngestionCrosswalk;
+import org.dspace.content.crosswalk.StreamIngestionCrosswalk;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.PluginManager;
import org.jdom.Document;
+import org.jdom.Content;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
@@ -142,7 +143,8 @@
* @throws AuthorizeException if it is returned by services called by this method.
*/
public InputStream getInputStream(Element mdRef)
- throws MetadataValidationException, IOException, SQLException, AuthorizeException;
+ throws MetadataValidationException, PackageValidationException,
+ IOException, SQLException, AuthorizeException;
}
/** log4j category */
@@ -154,10 +156,10 @@
/** Prefix of DSpace configuration lines that map METS metadata type to
* crosswalk plugin names.
*/
- private final static String CONFIG_METADATA_PREFIX = "mets.submission.crosswalk.";
+ public final static String CONFIG_METS_PREFIX = "mets.";
/** prefix of config lines identifying local XML Schema (XSD) files */
- private final static String CONFIG_XSD_PREFIX = "mets.xsd.";
+ private final static String CONFIG_XSD_PREFIX = CONFIG_METS_PREFIX+"xsd.";
/** Dublin core element namespace */
private static Namespace dcNS = Namespace
@@ -172,7 +174,7 @@
.getNamespace("mets", "http://www.loc.gov/METS/");
/** XLink namespace -- includes "xlink" prefix prefix for use in XPaths */
- private static Namespace xlinkNS = Namespace
+ public static Namespace xlinkNS = Namespace
.getNamespace("xlink", "http://www.w3.org/1999/xlink");
/** root element of the current METS manifest. */
@@ -187,6 +189,9 @@
/** builder to use for mdRef streams, inherited from create() */
private SAXBuilder parser = null;
+ /** name of packager who created this manifest object, for looking up configuration entries. */
+ private String configName;
+
// Create list of local schemas at load time, since it depends only
// on the DSpace configuration.
private static String localSchemas;
@@ -237,6 +242,7 @@
}
}
localSchemas = result.toString();
+ if (log.isDebugEnabled())
log.debug("Got local schemas = \""+localSchemas+"\"");
}
@@ -245,11 +251,12 @@
* @param builder XML parser (for parsing mdRef'd files and binData)
* @param mets parsed METS document
*/
- private METSManifest(SAXBuilder builder, Element mets)
+ private METSManifest(SAXBuilder builder, Element mets, String configName)
{
super();
this.mets = mets;
parser = builder;
+ this.configName = configName;
}
/**
@@ -262,12 +269,14 @@
* or validating the METS.
* @return new METSManifest object.
*/
- public static METSManifest create(InputStream is, boolean validate)
+ public static METSManifest create(InputStream is, boolean validate, String configName)
throws IOException,
MetadataValidationException
{
SAXBuilder builder = new SAXBuilder(validate);
+ builder.setIgnoringElementContentWhitespace(true);
+
// Set validation feature
if (validate)
builder.setFeature("http://apache.org/xml/features/validation/schema",
@@ -287,12 +296,13 @@
{
metsDocument = builder.build(is);
- // XXX for temporary debugging
- /*
- XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat());
- log.debug("Got METS DOCUMENT:");
- log.debug(outputPretty.outputString(metsDocument));
- */
+ /*** XXX leave commented out except if needed for
+ *** viewing the METS document that actually gets read.
+ *
+ * XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat());
+ * log.debug("Got METS DOCUMENT:");
+ * log.debug(outputPretty.outputString(metsDocument));
+ ****/
}
catch (JDOMException je)
{
@@ -300,7 +310,7 @@
+ is.toString(), je);
}
- return new METSManifest(builder, metsDocument.getRootElement());
+ return new METSManifest(builder, metsDocument.getRootElement(), configName);
}
/**
@@ -313,6 +323,17 @@
}
/**
+ * Return the OBJID attribute of the METS manifest.
+ * This is where the Handle URI/URN of the object can be found.
+ *
+ * @return OBJID attribute of METS manifest
+ */
+ public String getObjID()
+ {
+ return mets.getAttributeValue("OBJID");
+ }
+
+ /**
* Gets all file
elements which make up
* the item's content.
* @return a List of Element
s.
@@ -379,9 +400,9 @@
* attribute is peculiar to the DSpace METS SIP profile, and may not be
* generally useful with other sorts of METS documents.
* @param file METS file element of derived file
- * @return file Element of original or null if none found.
+ * @return file path of original or null if none found.
*/
- public Element getOriginalFile(Element file)
+ public String getOriginalFilePath(Element file)
{
String groupID = file.getAttributeValue("GROUPID");
if (groupID == null || groupID.equals(""))
@@ -395,10 +416,12 @@
List oFiles = xpath.selectNodes(mets);
if (oFiles.size() > 0)
{
- log.debug("Got ORIGINAL file for derived="+file.toString());
- return (Element)oFiles.get(0);
+ if (log.isDebugEnabled())
+ log.debug("Got ORIGINAL file for derived="+file.toString());
+ Element flocat = ((Element)oFiles.get(0)).getChild("FLocat", metsNS);
+ if (flocat != null)
+ return flocat.getAttributeValue("href", xlinkNS);
}
- else
return null;
}
catch (JDOMException je)
@@ -481,11 +504,11 @@
*
* @return file element of Item's primary bitstream, or null if there is none.
*/
- public Element getPrimaryBitstream()
+ public Element getPrimaryOrLogoBitstream()
throws MetadataValidationException
{
- Element firstDiv = getFirstDiv();
- Element fptr = firstDiv.getChild("fptr", metsNS);
+ Element objDiv = getObjStructDiv();
+ Element fptr = objDiv.getChild("fptr", metsNS);
if (fptr == null)
return null;
String id = fptr.getAttributeValue("FILEID");
@@ -497,7 +520,8 @@
return result;
}
- /** Get the metadata type from within a *mdSec element.
+ /**
+ * Get the metadata type from within a *mdSec element.
* @return metadata type name.
*/
public String getMdType(Element mdSec)
@@ -545,10 +569,27 @@
* @throws MetadataValidationException if METS is invalid, or there is an error parsing the XML.
*/
public List getMdContentAsXml(Element mdSec, Mdref callback)
- throws MetadataValidationException, IOException, SQLException, AuthorizeException
+ throws MetadataValidationException, PackageValidationException,
+ IOException, SQLException, AuthorizeException
{
try
{
+ // XXX sanity check: if this has more than one child, consider it
+ // an error since we cannot deal with more than one mdRef|mdWrap
+ // child. This may be considered a bug and need to be fixed,
+ // so it's best to bring it to the attention of users.
+ List mdc = mdSec.getChildren();
+ if (mdc.size() > 1)
+ {
+ // XXX scaffolding for debugging diagnosis; at least one
+ // XML parser stupidly includes newlines in prettyprinting
+ // as text content objects..
+ String id = mdSec.getAttributeValue("ID");
+ StringBuffer sb = new StringBuffer();
+ for (Iterator mi = mdc.iterator(); mi.hasNext();)
+ sb.append(", ").append(((Content)mi.next()).toString());
+ throw new MetadataValidationException("Cannot parse METS with "+mdSec.getQualifiedName()+" element that contains more than one child, size="+String.valueOf(mdc.size())+", ID="+id+"Kids="+sb.toString());
+ }
Element mdRef = null;
Element mdWrap = mdSec.getChild("mdWrap", metsNS);
if (mdWrap != null)
@@ -618,7 +659,8 @@
* @throws MetadataValidationException if METS format does not contain any metadata.
*/
public InputStream getMdContentAsStream(Element mdSec, Mdref callback)
- throws MetadataValidationException, IOException, SQLException, AuthorizeException
+ throws MetadataValidationException, PackageValidationException,
+ IOException, SQLException, AuthorizeException
{
Element mdRef = null;
Element mdWrap = mdSec.getChild("mdWrap", metsNS);
@@ -653,45 +695,147 @@
}
- // special call to crosswalk the guts of a metadata *Sec (dmdSec, amdSec)
- // because mdRef and mdWrap have to be handled differently.
- // It's a lot like getMdContentAsXml but cannot use that because xwalk
- // should be called with root element OR list depending on what was given.
- private void crosswalkMdContent(Element mdSec, Mdref callback,
- IngestionCrosswalk xwalk, Context context, DSpaceObject dso)
- throws CrosswalkException, IOException, SQLException, AuthorizeException
- {
- List xml = getMdContentAsXml(mdSec,callback);
-
- // if we get inappropriate metadata, e.g. PREMIS for Item, let it go.
- try
- {
- xwalk.ingest(context, dso, xml);
- }
- catch (CrosswalkObjectNotSupported e)
- {
- log.warn("Skipping metadata for inappropriate type of object: Object="+dso.toString()+", error="+e.toString());
- }
- }
-
- // return first
Element
s, each a + * First, implements recursive functionality in the disseminateAll() + * method of the PackageIngester interface. This method is setup to + * recursively call disseminate() method. + *
+ * All Package disseminators should either extend this abstract class
+ * or implement
+ * Package is any serialized representation of the item, at the discretion
+ * of the implementing class. It does not have to include content bitstreams.
+ *
+ * If a single operation (from the user's point of view) causes multiple
+ * update()s of a given object, then some objects may be exported more than once
+ * per user operation, but no object should be exported more than once per
+ * update(). See the object implementations to understand how this happens. In
+ * other words, over time the number of exports is not minimal, but I think it
+ * is as close as we can get.
+ *
+ * It is mandatory to configure {@code packageConsumer.workingDirectory} when
+ * employing this class; otherwise {@code initialize} will throw a
+ * NullPointerException.
+ *
+ * @author Mark Wood
+ */
+public class PackageConsumer implements Consumer
+{
+ /** Log file access */
+ private static final Logger log = LoggerFactory
+ .getLogger(PackageConsumer.class);
+
+ /** Configuration property: working directory. REQUIRED. */
+ private static final String WORKING_DIRECTORY = "packageConsumer.workingDirectory";
+
+ /** Accumulator for unique objects to be exported */
+ private Map
* @param context DSpace context.
* @param collection collection under which to create new item.
- * @param pkg input stream containing package to ingest.
+ * @param pkgFile The package file to ingest
* @param params package parameters (none recognized)
* @param license may be null, which takes default license.
* @return workspace item created by ingest.
* @throws PackageException if package is unacceptable or there is
* a fatal error turning it into an Item.
*/
- public WorkspaceItem ingest(Context context, Collection collection,
- InputStream pkg, PackageParameters params,
+ public DSpaceObject ingest(Context context, DSpaceObject parent,
+ File pkgFile, PackageParameters params,
String license)
throws PackageValidationException, CrosswalkException,
AuthorizeException, SQLException, IOException
@@ -144,41 +150,23 @@
Bitstream bs = null;
WorkspaceItem wi = null;
- /** XXX comment out for now
- // XXX for debugging of parameter handling
- if (params != null)
- {
- Enumeration pe = params.propertyNames();
- while (pe.hasMoreElements())
- {
- String name = (String)pe.nextElement();
- String v[] = params.getProperties(name);
- StringBuffer msg = new StringBuffer("PackageParam: ");
- msg.append(name).append(" = ");
- for (int i = 0; i < v.length; ++i)
- {
- if (i > 0)
- msg.append(", ");
- msg.append(v[i]);
- }
- log.debug(msg);
- }
- }
- **/
-
try
{
// Save the PDF in a bitstream first, since the parser
// has to read it as well, and we cannot "rewind" it after that.
- wi = WorkspaceItem.create(context, collection, false);
+ wi = WorkspaceItem.create(context, (Collection)parent, false);
Item myitem = wi.getItem();
original = myitem.createBundle("ORIGINAL");
- bs = original.createBitstream(pkg);
- pkg.close();
+
+ InputStream fileStream = new FileInputStream(pkgFile);
+ bs = original.createBitstream(fileStream);
+ fileStream.close();
+
bs.setName("package.pdf");
setFormatToMIMEType(context, bs, "application/pdf");
bs.update();
- log.debug("Created bitstream ID="+String.valueOf(bs.getID())+", parsing...");
+ if (log.isDebugEnabled())
+ log.debug("Created bitstream ID="+String.valueOf(bs.getID())+", parsing...");
crosswalkPDF(context, myitem, bs.retrieve());
@@ -188,7 +176,9 @@
log.info(LogManager.getHeader(context, "ingest",
"Created new Item, db ID="+String.valueOf(myitem.getID())+
", WorkspaceItem ID="+String.valueOf(wi.getID())));
- return wi;
+
+ myitem = PackageUtils.finishCreateItem(context, wi, null, params);
+ return myitem;
}
finally
{
@@ -216,23 +206,49 @@
}
/**
+ * IngestAll() cannot be implemented for a PDF ingester, because there's only one PDF to ingest
+ */
+ public List
+ * This ingester recognizes two distinct types of AIPs: "Manifest-Only" and "External".
+ * The Manifest-Only AIP, which is selected by specifying a PackageParameters
+ * key "manifestOnly" with the value "true", refers to all its contents by
+ * reference only. For Community or Collection AIPs this means all references to their
+ * child objects are just via Handles. For Item AIPs all Bitreams are just
+ * referenced by their asset store location instead of finding them in the "package".
+ * The Manifest-Only AIP package format is simply a METS XML document serialized into a file.
+ *
+ * An "external" AIP (the default), is a conventional Zip-file based package
+ * that includes copies of all bitstreams referenced by the object as well
+ * as a serialized METS XML document in the path "mets.xml".
+ *
+ * Configuration keys:
+ * The following take as values a space-and-or-comma-separated list
+ * of plugin names that name *either* a DisseminationCrosswalk or
+ * StreamDisseminationCrosswalk plugin. Shown are the dfeault values.
+ * The value may be a simple plugin name, or a METS MDsec-name followed by
+ * a colon and the plugin name e.g. "DSpaceHistory :HISTORY"
+ *
+ * # MD types to put in the sourceMD section of the object.
+ * aip.disseminate.sourceMD = AIP-TECHMD
+ *
+ * # MD types to put in the techMD section of the object (and member Bitstreams if an Item)
+ * aip.disseminate.techMD = PREMIS
+ *
+ * # MD types to put in digiprovMD section of the object.
+ * # (Note that this is disabled unless the History System is installed)
+ * #aip.disseminate.digiprovMD = DSpaceHistory :HISTORY
+ *
+ * # MD types to put in the rightsMD section of the object.
+ * aip.disseminate.rightsMD = DSpaceDepositLicense:DSPACE_DEPLICENSE, \
+ * CreativeCommonsRDF:DSPACE_CCRDF, CreativeCommonsText:DSPACE_CCTXT
+ *
+ * # MD types to put in dmdSec's corresponding the object.
+ * aip.disseminate.dmd = MODS, DIM
+ *
+ * @author Larry Stone
+ * @version $Revision: 1.1 $
+ * @see AbstractMETSDisseminator
+ */
+public class DSpaceAIPDisseminator
+ extends AbstractMETSDisseminator
+{
+ /** log4j category */
+ private static Logger log = Logger.getLogger(DSpaceAIPDisseminator.class);
+
+ /**
+ * Unique identifier for the profile of the METS document.
+ * To ensure uniqueness, it is the URL that the XML schema document would
+ * have _if_ there were to be one. There is no schema at this time.
+ */
+ public final static String PROFILE_1_0 =
+ "http://www.dspace.org/schema/aip/mets_aip_1_0.xsd";
+
+ /** TYPE of the div containing AIP's parent handle in its mptr. */
+ final public static String PARENT_DIV_TYPE = "AIP Parent Link";
+
+ // Default MDTYPE value for deposit license -- "magic string"
+ // NOTE: format is
+ * For an manifest-only AIP, this is a reference to an HTTP URL where
+ * the bitstream should be able to be downloaded from.
+ * An external AIP names a file in the package
+ * with a relative URL, that is, relative pathname.
+ *
+ * @return String in URL format naming path to bitstream.
+ */
+ public String makeBitstreamURL(Bitstream bitstream, PackageParameters params)
+ {
+ // if bare manifest, use external "persistent" URI for bitstreams
+ if (params != null && (params.getBooleanProperty("manifestOnly", false)))
+ {
+ // Try to build a persistent(-ish) URI for bitstream
+ // Format: {site-base-url}/bitstream/{item-handle}/{sequence-id}/{bitstream-name}
+ try
+ {
+ // get handle of parent Item of this bitstream, if there is one:
+ String handle = null;
+ Bundle[] bn = bitstream.getBundles();
+ if (bn.length > 0)
+ {
+ Item bi[] = bn[0].getItems();
+ if (bi.length > 0)
+ handle = bi[0].getHandle();
+ }
+ if (handle != null)
+ {
+ return ConfigurationManager
+ .getProperty("dspace.url")
+ + "/bitstream/"
+ + handle
+ + "/"
+ + String.valueOf(bitstream.getSequenceID())
+ + "/"
+ + URLEncoder.encode(bitstream.getName(), "UTF-8");
+ }
+ }
+ catch (Exception e)
+ {
+ //do nothing -- we just fail to build a nice bitstream url
+ }
+
+ // We should only get here if we failed to build a nice URL above
+ // so, by default, we're just going to return the bitstream name.
+ return bitstream.getName();
+ }
+ else
+ {
+ String base = "bitstream_"+String.valueOf(bitstream.getID());
+ String ext[] = bitstream.getFormat().getExtensions();
+ return (ext.length > 0) ? base+"."+ext[0] : base;
+ }
+ }
+
+ /**
+ * Adds another structMap element to contain the "parent link" that
+ * is an essential part of every AIP. This is a structmap of one
+ * div, which contains an mptr indicating the Handle of the parent
+ * of this object in the archive. The div has a unique TYPE attribute
+ * value, "AIP Parent Link", and the mptr has a LOCTYPE of "HANDLE"
+ * and an xlink:href containing the raw Handle value.
+ *
+ * Note that the parent Handle has to be stored here because the
+ * parent is needed to create a DSpace Object when restoring the
+ * AIP; it cannot be determined later once the ingester parses it
+ * out of the metadata when the crosswalks are run. So, since the
+ * crosswalks require an object to operate on, and creating the
+ * object requires a parent, we cannot depend on metadata processed
+ * by crosswalks (e.g. AIP techMd) for the parent, it has to be at
+ * a higher level in the AIP manifest. The structMap is an obvious
+ * and standards-compliant location for it.
+ */
+ public void addStructMap(Context context, DSpaceObject dso,
+ PackageParameters params, Mets mets)
+ throws SQLException, IOException, AuthorizeException, MetsException
+ {
+ // find parent Handle
+ String parentHandle = null;
+ switch (dso.getType())
+ {
+ case Constants.ITEM:
+ parentHandle = ((Item)dso).getOwningCollection().getHandle();
+ break;
+
+ case Constants.COLLECTION:
+ parentHandle = (((Collection)dso).getCommunities())[0].getHandle();
+ break;
+
+ case Constants.COMMUNITY:
+ Community parent = ((Community)dso).getParentCommunity();
+ if (parent == null)
+ parentHandle = Site.getSiteHandle();
+ else
+ parentHandle = parent.getHandle();
+ case Constants.SITE:
+ break;
+ }
+
+ // Parent Handle should only be null if we are creating a site-wide AIP
+ if(parentHandle!=null)
+ {
+ // add a structMap to contain div pointing to parent:
+ StructMap structMap = new StructMap();
+ structMap.setID(gensym("struct"));
+ structMap.setTYPE("LOGICAL");
+ structMap.setLABEL("Parent");
+ Div div0 = new Div();
+ div0.setID(gensym("div"));
+ div0.setTYPE(PARENT_DIV_TYPE);
+ div0.setLABEL("Parent of this DSpace Object");
+ Mptr mptr = new Mptr();
+ mptr.setID(gensym("mptr"));
+ mptr.setLOCTYPE(Loctype.HANDLE);
+ mptr.setXlinkHref(parentHandle);
+ div0.getContent().add(mptr);
+ structMap.getContent().add(div0);
+ mets.getContent().add(structMap);
+ }
+ }
+
+ /**
+ * include all bundles in AIP as content.
+ */
+ public boolean includeBundle(Bundle bundle)
+ {
+ return true;
+ }
+}
Index: PackageUtils.java
===================================================================
--- PackageUtils.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
+++ PackageUtils.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
@@ -39,22 +39,31 @@
package org.dspace.content.packager;
import java.io.ByteArrayInputStream;
+import java.io.File;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.sql.SQLException;
+import java.util.HashMap;
+import java.util.Map;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.BitstreamFormat;
import org.dspace.content.Bundle;
import org.dspace.content.Collection;
+import org.dspace.content.Community;
import org.dspace.content.DCValue;
+import org.dspace.content.DSpaceObject;
import org.dspace.content.FormatIdentifier;
+import org.dspace.content.InstallItem;
import org.dspace.content.Item;
+import org.dspace.content.WorkspaceItem;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.license.CreativeCommons;
+import org.dspace.workflow.WorkflowItem;
+import org.dspace.workflow.WorkflowManager;
/**
* Container class for code that is useful to many packagers.
@@ -65,7 +74,71 @@
public class PackageUtils
{
+
+ // Map of metadata elements for Communities and Collections
+ // Format is alternating key/value in a straight array; use this
+ // to initialize hash tables that convert to and from.
+ private final static String ccMetadataMap[] =
+ {
+ // getMetadata() -> DC element.term
+ "name", "dc.title",
+ "introductory_text", "dc.description",
+ "short_description", "dc.description.abstract",
+ "side_bar_text", "dc.description.tableofcontents",
+ "copyright_text", "dc.rights",
+ "provenance_description", "dc.provenance",
+ "license", "dc.rights.license"
+ };
+
+ // HashMaps to convert Community/Collection metadata to/from Dublin Core
+ // (useful when crosswalking Communities/Collections)
+ private final static Map
+ * e.g. "dc.title" would translate to the "name" database column
+ *
+ * This method is of use when crosswalking Community or Collection metadata for ingest,
+ * as most ingest Crosswalks tend to deal with translating to DC-based metadata.
+ *
+ * @param dcField The dublin core metadata field
+ * @return The Community or Collection DB column where this metadata info is stored.
+ */
+ public static String dcToContainerMetadata(String dcField)
+ {
+ return ccDCToMetadata.get(dcField);
+ }
+
+ /**
+ * Translate a Container's (Community or Collection) database column into
+ * a valid Dublin Core metadata field. This is the opposite of 'dcToContainerMetadata()'.
+ *
+ * e.g. the "name" database column would translate to "dc.title"
+ *
+ * This method is of use when crosswalking Community or Collection metadata for dissemination,
+ * as most dissemination Crosswalks tend to deal with translating from DC-based metadata.
+ *
+ *
+ * @param databaseField The Community or Collection DB column
+ * @return The Dublin Core metadata field that this metadata translates to.
+ */
+ public static String containerMetadataToDC(String databaseField)
+ {
+ return ccMetadataToDC.get(databaseField);
+ }
+
+ /**
* Test that item has adequate metadata.
* Check item for the minimal DC metadata required to ingest a
* new item, and throw a PackageValidationException if test fails.
@@ -73,7 +146,7 @@
*
* @param item - item to test.
*/
- public static void checkMetadata(Item item)
+ public static void checkItemMetadata(Item item)
throws PackageValidationException
{
DCValue t[] = item.getDC( "title", null, Item.ANY);
@@ -99,7 +172,20 @@
if (license == null)
license = collection.getLicense();
InputStream lis = new ByteArrayInputStream(license.getBytes());
- Bundle lb = item.createBundle(Constants.LICENSE_BUNDLE_NAME);
+
+ Bundle lb;
+ //If LICENSE bundle is missing, create it
+ Bundle[] bundles = item.getBundles(Constants.LICENSE_BUNDLE_NAME);
+ if(bundles==null || bundles.length==0)
+ {
+ lb = item.createBundle(Constants.LICENSE_BUNDLE_NAME);
+ }
+ else
+ {
+ lb = bundles[0];
+ }
+
+ //Create the License bitstream
Bitstream lbs = lb.createBitstream(lis);
lis.close();
BitstreamFormat bf = BitstreamFormat.findByShortDescription(context, "License");
@@ -247,6 +333,29 @@
String shortDesc, String MIMEType, String desc)
throws SQLException, AuthorizeException
{
+ return findOrCreateBitstreamFormat(context, shortDesc, MIMEType, desc, BitstreamFormat.KNOWN, false);
+ }
+
+ /**
+ * Find or create a bitstream format to match the given short
+ * description.
+ * Used by packager ingesters to obtain a special bitstream
+ * format for the manifest (and/or metadata) file.
+ *
+ * NOTE: When creating a new format, do NOT set any extensions, since
+ * we don't want any file with the same extension, which may be something
+ * generic like ".xml", to accidentally get set to this format.
+ * @param context - the context.
+ * @param shortDesc - short descriptive name, used to locate existing format.
+ * @param MIMEtype - mime content-type
+ * @param desc - long description
+ * @param internal value for the 'internal' flag of a new format if created.
+ * @return BitstreamFormat object that was found or created. Never null.
+ */
+ public static BitstreamFormat findOrCreateBitstreamFormat(Context context,
+ String shortDesc, String MIMEType, String desc, int supportLevel, boolean internal)
+ throws SQLException, AuthorizeException
+ {
BitstreamFormat bsf = BitstreamFormat.findByShortDescription(context,
shortDesc);
// not found, try to create one
@@ -256,9 +365,376 @@
bsf.setShortDescription(shortDesc);
bsf.setMIMEType(MIMEType);
bsf.setDescription(desc);
- bsf.setSupportLevel(BitstreamFormat.KNOWN);
+ bsf.setSupportLevel(supportLevel);
+ bsf.setInternal(internal);
bsf.update();
}
return bsf;
}
+
+ /**
+ * Utility to find the license bitstream from an item
+ *
+ * @param context
+ * DSpace context
+ * @param item
+ * the item
+ * @return the license bitstream or null
+ *
+ * @throws IOException
+ * if the license bitstream can't be read
+ */
+ public static Bitstream findDepositLicense(Context context, Item item)
+ throws SQLException, IOException, AuthorizeException
+ {
+ // get license format ID
+ int licenseFormatId = -1;
+ BitstreamFormat bf = BitstreamFormat.findByShortDescription(context,
+ "License");
+ if (bf != null)
+ licenseFormatId = bf.getID();
+
+ Bundle[] bundles = item.getBundles(Constants.LICENSE_BUNDLE_NAME);
+ for (int i = 0; i < bundles.length; i++)
+ {
+ // Assume license will be in its own bundle
+ Bitstream[] bitstreams = bundles[i].getBitstreams();
+
+ for(int j=0; j < bitstreams.length; j++)
+ {
+ // The License should have a file format of "License"
+ if (bitstreams[j].getFormat().getID() == licenseFormatId)
+ {
+ //found a bitstream with format "License" -- return it
+ return bitstreams[j];
+ }
+ }
+
+ // If we couldn't find a bitstream with format = "License",
+ // we will just assume the first bitstream is the deposit license
+ // (usually a safe assumption as it is in the LICENSE bundle)
+ if(bitstreams.length>0)
+ return bitstreams[0];
+ }
+
+ // Oops! No license!
+ return null;
+ }
+
+
+ /*=====================================================
+ * Utility Methods -- may be useful for subclasses
+ *====================================================*/
+
+
+ /**
+ * Create the specified DSpace Object, based on the passed
+ * in Package Parameters (along with other basic info required
+ * to create the object)
+ *
+ * @param context DSpace Context
+ * @param parent Parent Object
+ * @param type Type of new Object
+ * @param handle Handle of new Object (may be null)
+ * @param params Properties-style list of options (interpreted by each packager).
+ * @return newly created DSpace Object (or null)
+ * @throws AuthorizeException
+ * @throws SQLException
+ * @throws IOException
+ */
+ public static DSpaceObject createDSpaceObject(Context context, DSpaceObject parent, int type, String handle, PackageParameters params)
+ throws AuthorizeException, SQLException, IOException
+ {
+ DSpaceObject dso = null;
+
+ switch (type)
+ {
+ case Constants.COLLECTION:
+ dso = ((Community)parent).createCollection(handle);
+ return dso;
+
+ case Constants.COMMUNITY:
+ // top-level community?
+ if (parent == null || parent.getType() == Constants.SITE)
+ dso = Community.create(null, context, handle);
+ else
+ dso = ((Community)parent).createSubcommunity(handle);
+ return dso;
+
+ case Constants.ITEM:
+ //Initialize a WorkspaceItem
+ //(Note: set submitter to currentUser for now -- we can change it later if manifest specifies someone else)
+ WorkspaceItem wsi = WorkspaceItem.create(context, (Collection)parent, params.useCollectionTemplate(), context.getCurrentUser(), handle);
+
+ // Finish creating item (this will either install item or start a workflow, based on params)
+ dso = finishCreateItem(context, wsi, handle, params);
+
+ return dso;
+ }
+
+ return null;
+ }
+
+ /**
+ * Perform any final tasks on a newly created WorkspaceItem in order to finish
+ * ingestion of an Item.
+ *
+ * This may include starting up a workflow for the new item, restoring it,
+ * or archiving it (based on params passed in)
+ *
+ * @param context DSpace Context
+ * @param wsi Workspace Item that requires finishing
+ * @param handle Handle to assign to item (may be null)
+ * @param params Properties-style list of options (interpreted by each packager).
+ * @return finished Item
+ * @throws IOException
+ * @throws SQLException
+ * @throws AuthorizeException
+ */
+ public static Item finishCreateItem(Context context, WorkspaceItem wsi, String handle, PackageParameters params)
+ throws IOException, SQLException, AuthorizeException
+ {
+ // restore existing object using the package (including attempting to restore the handle)
+ if (params.restoreModeEnabled())
+ {
+ InstallItem.restoreItem(context, wsi, handle);
+
+ //return newly restored item
+ return wsi.getItem();
+ }
+ // submit normally, passing along to workflow
+ else if (params.workflowEnabled())
+ {
+ // Start an item workflow
+ WorkflowItem wfi = WorkflowManager.startWithoutNotify(context, wsi);
+
+ // return item with workflow started
+ return wfi.getItem();
+ }
+
+ // skip workflow, but otherwise normal submission
+ else
+ {
+ InstallItem.installItem(context, wsi, handle);
+
+ // return newly installed item
+ return wsi.getItem();
+ }
+ }//end finishCreateItem
+
+
+ /**
+ * Commit all recent changes to DSpaceObject.
+ *
+ * This method is necessary as there is no generic 'update()' on a DSpaceObject
+ *
+ * @param dso DSpaceObject to update
+ */
+ public static void updateDSpaceObject(DSpaceObject dso)
+ throws AuthorizeException, SQLException, IOException
+ {
+ if (dso != null)
+ {
+ switch (dso.getType())
+ {
+ case Constants.BITSTREAM:
+ ((Bitstream)dso).update();
+ break;
+ case Constants.ITEM:
+ ((Item)dso).update();
+ break;
+ case Constants.COLLECTION:
+ ((Collection)dso).update();
+ break;
+ case Constants.COMMUNITY:
+ ((Community)dso).update();
+ break;
+ }
+ }
+ }
+
+
+ /**
+ * Utility method to retrieve the file extension off of a filename.
+ *
+ * @param filename Full filename
+ * @return file extension
+ */
+ public static String getFileExtension(String filename)
+ {
+ // Extract the file extension off of a filename
+ String extension = filename;
+ int lastDot = filename.lastIndexOf('.');
+
+ if (lastDot != -1)
+ {
+ extension = filename.substring(lastDot + 1);
+ }
+
+ return extension;
+ }
+
+
+ /**
+ * Returns name of a dissemination information package (DIP), based on the
+ * DSpace object and a provided fileExtension
+ *
+ * Format: [dspace-obj-type]@[handle-with-dashes].[fileExtension]
+ * OR [dspace-obj-type]@internal-id-[dspace-ID].[fileExtension]
+ *
+ * @param dso DSpace Object to create file name for
+ * @param fileExtension file Extension of output file.
+ * @return filename of a DIP representing the DSpace Object
+ */
+ public static String getPackageName(DSpaceObject dso, String fileExtension)
+ {
+ String handle = dso.getHandle();
+ // if Handle is empty, use internal ID for name
+ if(handle==null || handle.isEmpty())
+ handle = "internal-id-" + dso.getID();
+ else // if Handle exists, replace '/' with '-' to meet normal file naming conventions
+ handle = handle.replace("/", "-");
+
+ //Get type name
+ int typeID = dso.getType();
+ String type = Constants.typeText[typeID];
+
+ //check if passed in file extension already starts with "."
+ if(!fileExtension.startsWith(".")) fileExtension = "." + fileExtension;
+
+ //Here we go, here's our magical file name!
+ //Format: typeName@handle.extension
+ return type + "@" + handle + fileExtension;
+ }
+
+
+ /**
+ * Creates the specified file (along with all parent directories) if it doesn't already
+ * exist. If the file already exists, nothing happens.
+ *
+ * @param file
+ * @return boolean true if succeeded, false otherwise
+ * @throws IOException
+ */
+ public static boolean createFile(File file)
+ throws IOException
+ {
+ boolean success = false;
+
+ //Check if file exists
+ if(!file.exists())
+ {
+ //file doesn't exist yet, does its parent directory exist?
+ if(!file.getParentFile().exists())
+ {
+ //create the parent directory structure
+ file.getParentFile().mkdirs();
+ }
+ //create actual file
+ success = file.createNewFile();
+ }
+ return success;
+ }
+
+ /**
+ * Remove all bitstreams (files) associated with a DSpace object.
+ *
+ * If this object is an Item, it removes all bundles & bitstreams. If this
+ * object is a Community or Collection, it removes all logo bitstreams.
+ *
+ * This method is useful for replace functionality.
+ *
+ * @param dso The object to remove all bitstreams from
+ */
+ public static void removeAllBitstreams(DSpaceObject dso)
+ throws SQLException, IOException, AuthorizeException
+ {
+ //If we are dealing with an Item
+ if(dso.getType()==Constants.ITEM)
+ {
+ Item item = (Item) dso;
+ // Get a reference to all Bundles in Item (which contain the bitstreams)
+ Bundle[] bunds = item.getBundles();
+
+ // Remove each bundle -- this will in turn remove all bitstreams associated with this Item.
+ for (int i = 0; i < bunds.length; i++)
+ {
+ item.removeBundle(bunds[i]);
+ }
+ }
+ else if (dso.getType()==Constants.COLLECTION)
+ {
+ Collection collection = (Collection) dso;
+ //clear out the logo for this collection
+ collection.setLogo(null);
+ }
+ else if (dso.getType()==Constants.COMMUNITY)
+ {
+ Community community = (Community) dso;
+ //clear out the logo for this community
+ community.setLogo(null);
+ }
+ }
+
+
+ /**
+ * Removes all metadata associated with a DSpace object.
+ *
+ * This method is useful for replace functionality.
+ *
+ * @param dso The object to remove all metadata from
+ */
+ public static void clearAllMetadata(DSpaceObject dso)
+ throws SQLException, IOException, AuthorizeException
+ {
+ //If we are dealing with an Item
+ if(dso.getType()==Constants.ITEM)
+ {
+ Item item = (Item) dso;
+ //clear all metadata entries
+ item.clearMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY);
+ }
+ //Else if collection, clear its database table values
+ else if (dso.getType()==Constants.COLLECTION)
+ {
+ Collection collection = (Collection) dso;
+
+ // Use the MetadataToDC map (defined privately in this class)
+ // to clear out all the Collection database fields.
+ for(String dbField : ccMetadataToDC.keySet())
+ {
+ try
+ {
+ collection.setMetadata(dbField, null);
+ }
+ catch(IllegalArgumentException ie)
+ {
+ // ignore the error -- just means the field doesn't exist in DB
+ // Communities & Collections don't include the exact same metadata fields
+ }
+ }
+ }
+ //Else if community, clear its database table values
+ else if (dso.getType()==Constants.COMMUNITY)
+ {
+ Community community = (Community) dso;
+
+ // Use the MetadataToDC map (defined privately in this class)
+ // to clear out all the Community database fields.
+ for(String dbField : ccMetadataToDC.keySet())
+ {
+ try
+ {
+ community.setMetadata(dbField, null);
+ }
+ catch(IllegalArgumentException ie)
+ {
+ // ignore the error -- just means the field doesn't exist in DB
+ // Communities & Collections don't include the exact same metadata fields
+ }
+ }
+ }
+
+ }
+
}
Index: PackageDisseminator.java
===================================================================
--- PackageDisseminator.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
+++ PackageDisseminator.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
@@ -38,9 +38,10 @@
package org.dspace.content.packager;
+import java.io.File;
import java.io.IOException;
-import java.io.OutputStream;
import java.sql.SQLException;
+import java.util.List;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.DSpaceObject;
@@ -82,28 +83,65 @@
* "package" on the indicated OutputStream. Package is any serialized
* representation of the item, at the discretion of the implementing
* class. It does not have to include content bitstreams.
- *
* Use the
* Throws an exception of the chosen object is not acceptable or there is
* a failure creating the package.
*
* @param context DSpace context.
* @param object DSpace object (item, collection, etc)
* @param params Properties-style list of options specific to this packager
- * @param out output stream on which to write package
+ * @param pkgFile File where export package should be written
* @throws PackageValidationException if package cannot be created or there is
* a fatal error in creating it.
*/
void disseminate(Context context, DSpaceObject object,
- PackageParameters params, OutputStream out)
+ PackageParameters params, File pkgFile)
throws PackageException, CrosswalkException,
AuthorizeException, SQLException, IOException;
/**
+ * Recursively export one or more DSpace Objects as a series of packages.
+ * This method will export the given DSpace Object as well as all referenced
+ * DSpaceObjects (e.g. child objects) into a series of packages. The
+ * initial object is exported to the location specified by the pkgFile.
+ * All other generated packages are recursively exported to the same directory.
+ *
+ * Package is any serialized representation of the item, at the discretion
+ * of the implementing class. It does not have to include content bitstreams.
+ *
+ * Use the
+ * Throws an exception of the initial object is not acceptable or there is
+ * a failure creating the packages.
+ *
+ * A packager may choose not to implement
* Package Parameters:
+ * Use the
+ * Throws an exception of the chosen object is not acceptable or there is
* a failure creating the package.
*
- * @param context - DSpace context.
- * @param dso - DSpace object (item, collection, etc)
- * @param pkg - output stream on which to write package
- * @throws PackageException if package cannot be created or there is
+ * @param context DSpace context.
+ * @param object DSpace object (item, collection, etc)
+ * @param params Properties-style list of options specific to this packager
+ * @param pkgFile File where export package should be written
+ * @throws PackageValidationException if package cannot be created or there is
* a fatal error in creating it.
*/
public void disseminate(Context context, DSpaceObject dso,
- PackageParameters params, OutputStream pkg)
+ PackageParameters params, File pkgFile)
throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException
{
- if (dso.getType() == Constants.ITEM)
+ try
{
- Item item = (Item)dso;
- long lmTime = item.getLastModified().getTime();
+ //Make sure our package file exists
+ if(!pkgFile.exists())
+ {
+ PackageUtils.createFile(pkgFile);
+ }
- // how to handle unauthorized bundle/bitstream:
- String unauth = (params == null) ? null : params.getProperty("unauthorized");
+ //Open up an output stream to write to package file
+ FileOutputStream outStream = new FileOutputStream(pkgFile);
- if (params != null && params.getProperty("manifestOnly") != null)
+ // Generate a true manifest-only "package", no external files/data & no need to zip up
+ if (params != null && params.getBooleanProperty("manifestOnly", false))
{
- extraFiles = null;
- writeManifest(context, item, params, pkg);
+ Mets manifest = makeManifest(context, dso, params, null);
+ manifest.validate(new MetsValidator());
+ manifest.write(new MetsWriter(outStream));
}
else
{
- extraFiles = new HashMap();
- ZipOutputStream zip = new ZipOutputStream(pkg);
- zip.setComment("METS archive created by DSpace METSDisseminationCrosswalk");
+ // make a Zip-based package
+ writeZipPackage(context, dso, params, outStream);
+ }//end if/else
- // write manifest first.
- ZipEntry me = new ZipEntry(MANIFEST_FILE);
- me.setTime(lmTime);
- zip.putNextEntry(me);
- writeManifest(context, item, params, zip);
- zip.closeEntry();
+ //Close stream / stop writing to file
+ outStream.close();
+ }//end try
+ catch (MetsException e)
+ {
+ // We don't pass up a MetsException, so callers don't need to
+ // know the details of the METS toolkit
+ log.error("METS error: ",e);
+ throw new PackageValidationException(e);
+ }
+ }
+
+
+ /**
+ * Make a Zipped up METS package for the given DSpace Object
+ *
+ * @param context DSpace Context
+ * @param dso The DSpace Object
+ * @param params Parameters to the Packager script
+ * @param pkg Package output stream
+ * @throws PackageValidationException
+ * @throws AuthorizeException
+ * @throws SQLException
+ * @throws IOException
+ */
+ protected void writeZipPackage(Context context, DSpaceObject dso, PackageParameters params, OutputStream pkg)
+ throws PackageValidationException, CrosswalkException, MetsException, AuthorizeException, SQLException, IOException
+ {
+ long lmTime = 0;
+ if (dso.getType() == Constants.ITEM)
+ lmTime = ((Item)dso).getLastModified().getTime();
+
+ // map of extra streams to put in Zip (these are located during makeManifest())
+ MdStreamCache extraStreams = new MdStreamCache();
+ ZipOutputStream zip = new ZipOutputStream(pkg);
+ zip.setComment("METS archive created by DSpace METSDisseminationCrosswalk");
+ Mets manifest = makeManifest(context, dso, params, extraStreams);
+
+ // copy extra (metadata, license, etc) bitstreams into zip, update manifest
+ if (extraStreams != null)
+ {
+ for (Map.Entry ment : extraStreams.getMap().entrySet())
+ {
+ MdRef ref = (MdRef)ment.getKey();
- // copy extra (meta?) bitstreams into zip
- Iterator fi = extraFiles.keySet().iterator();
- while (fi.hasNext())
+ // Both Deposit Licenses & CC Licenses which are referenced as "extra streams" may already be
+ // included in our Package (if their bundles are already included in the
+ * So, if they are being added by *both*, then we want to just link the rightsMD
+ * Defaults to 'true' if previously unset, as by default all
+ * DSpace Workflows should be enabled.
+ *
+ * @return boolean result
+ */
+ public boolean workflowEnabled()
+ {
+ return getBooleanProperty("useWorkflow", true);
+ }
+
+ /***
+ * Utility method to enable/disable workflow for Item ingestion.
+ *
+ * @param value boolean value (true = workflow enabled, false = workflow disabled)
+ * @return boolean result
+ */
+ public void setWorkflowEnabled(boolean value)
+ {
+ addProperty("useWorkflow", String.valueOf(value));
+ }
+
+
+ /***
+ * Utility method to tell if restore mode is enabled.
+ * Checks the Packager parameters.
+ *
+ * Restore mode attempts to restore an missing/deleted object completely
+ * (including handle), based on contents of a package.
+ *
+ * NOTE: restore mode should throw an error if it attempts to restore an
+ * object which already exists. Use 'keep-existing' or 'replace' mode to
+ * either skip-over (keep) or replace existing objects.
+ *
+ * Defaults to 'false' if previously unset. NOTE: 'replace' mode and
+ * 'keep-existing' mode are special types of "restores". So, when either
+ * replaceModeEnabled() or keepExistingModeEnabled() is true, this method
+ * should also return true.
+ *
+ * @return boolean result
+ */
+ public boolean restoreModeEnabled()
+ {
+ if(getBooleanProperty("restoreMode", false) ||
+ replaceModeEnabled() ||
+ keepExistingModeEnabled())
+ return true;
+ else
+ return false;
+ }
+
+ /***
+ * Utility method to enable/disable restore mode.
+ *
+ * Restore mode attempts to restore an missing/deleted object completely
+ * (including handle), based on a given package's contents.
+ *
+ * NOTE: restore mode should throw an error if it attempts to restore an
+ * object which already exists. Use 'keep-existing' or 'replace' mode to
+ * either skip-over (keep) or replace existing objects.
+ *
+ * @param value boolean value (true = restore enabled, false = restore disabled)
+ * @return boolean result
+ */
+ public void setRestoreModeEnabled(boolean value)
+ {
+ addProperty("restoreMode", String.valueOf(value));
+ }
+
+ /***
+ * Utility method to tell if replace mode is enabled.
+ * Checks the Packager parameters.
+ *
+ * Replace mode attempts to overwrite an existing object and replace it
+ * with the contents of a package. Replace mode is considered a special type
+ * of "restore", where the current object is being restored to a previous state.
+ *
+ * Defaults to 'false' if previously unset.
+ *
+ * @return boolean result
+ */
+ public boolean replaceModeEnabled()
+ {
+ return getBooleanProperty("replaceMode", false);
+ }
+
+ /***
+ * Utility method to enable/disable replace mode.
+ *
+ * Replace mode attempts to overwrite an existing object and replace it
+ * with the contents of a package. Replace mode is considered a special type
+ * of "restore", where the current object is being restored to a previous state.
+ *
+ * @param value boolean value (true = replace enabled, false = replace disabled)
+ * @return boolean result
+ */
+ public void setReplaceModeEnabled(boolean value)
+ {
+ addProperty("replaceMode", String.valueOf(value));
+ }
+
+ /***
+ * Utility method to tell if 'keep-existing' mode is enabled.
+ * Checks the Packager parameters.
+ *
+ * Keep-Existing mode is identical to 'restore' mode, except that it
+ * skips over any objects which are found to already be existing. It
+ * essentially restores all missing objects, but keeps existing ones intact.
+ *
+ * Defaults to 'false' if previously unset.
+ *
+ * @return boolean result
+ */
+ public boolean keepExistingModeEnabled()
+ {
+ return getBooleanProperty("keepExistingMode", false);
+ }
+
+ /***
+ * Utility method to enable/disable 'keep-existing' mode.
+ *
+ * Keep-Existing mode is identical to 'restore' mode, except that it
+ * skips over any objects which are found to already be existing. It
+ * essentially restores all missing objects, but keeps existing ones intact.
+ *
+ * @param value boolean value (true = replace enabled, false = replace disabled)
+ * @return boolean result
+ */
+ public void setKeepExistingModeEnabled(boolean value)
+ {
+ addProperty("keepExistingMode", String.valueOf(value));
+ }
+
+ /***
+ * Utility method to tell if Items should use a Collection's template
+ * when they are created.
+ *
+ * Defaults to 'false' if previously unset.
+ *
+ * @return boolean result
+ */
+ public boolean useCollectionTemplate()
+ {
+ return getBooleanProperty("useCollectionTemplate", false);
+ }
+
+ /***
+ * Utility method to enable/disable Collection Template for Item ingestion.
+ *
+ * When enabled, the Item will be installed using the parent collection's
+ * Item Template
+ *
+ * @param value boolean value (true = template enabled, false = template disabled)
+ * @return boolean result
+ */
+ public void setUseCollectionTemplate(boolean value)
+ {
+ addProperty("useCollectionTemplate", String.valueOf(value));
+ }
+
+
+ /***
+ * Utility method to tell if recursive mode is enabled.
+ * Checks the Packager parameters.
+ *
+ * Recursive mode should be enabled anytime one of the *All() methods
+ * is called (e.g. ingestAll(), replaceAll() or disseminateAll()). It
+ * recursively performs the same action on all related objects.
+ *
+ * Defaults to 'false' if previously unset.
+ *
+ * @return boolean result
+ */
+ public boolean recursiveModeEnabled()
+ {
+ return getBooleanProperty("recursiveMode", false);
+ }
+
+ /***
+ * Utility method to enable/disable recursive mode.
+ *
+ * Recursive mode should be enabled anytime one of the *All() methods
+ * is called (e.g. ingestAll(), replaceAll() or disseminateAll()). It
+ * recursively performs the same action on all related objects.
+ *
+ * @param value boolean value (true = recursion enabled, false = recursion disabled)
+ * @return boolean result
+ */
+ public void setRecursiveModeEnabled(boolean value)
+ {
+ addProperty("recursiveMode", String.valueOf(value));
+ }
+
+
}
Index: AbstractPackageIngester.java
===================================================================
--- AbstractPackageIngester.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 0)
+++ AbstractPackageIngester.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
@@ -0,0 +1,354 @@
+/**
+ * AbstractPackageIngester.java
+ *
+ * Version: $Revision$
+ *
+ * Date: $Date$
+ *
+ * Copyright (c) 2010, DuraSpace. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the DSpace Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+package org.dspace.content.packager;
+
+import java.io.File;
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.log4j.Logger;
+
+import org.dspace.authorize.AuthorizeException;
+import org.dspace.content.DSpaceObject;
+import org.dspace.content.crosswalk.CrosswalkException;
+import org.dspace.core.Constants;
+import org.dspace.core.Context;
+import org.dspace.core.LogManager;
+
+/**
+ * An abstract implementation of a DSpace Package Ingester, which
+ * implements a few helper/utility methods that most (all?) PackageIngesters
+ * may find useful.
+ *
+ * First, implements recursive functionality in ingestAll() and replaceAll()
+ * methods of the PackageIngester interface. These methods are setup to
+ * recursively call ingest() and replace() respectively.
+ *
+ * Finally, it also implements several utility methods (createDSpaceObject(),
+ * finishCreateItem(), updateDSpaceObject()) which subclasses may find useful.
+ * This classes will allow subclasses to easily create/update objects without
+ * having to worry too much about normal DSpace submission workflows (which is
+ * taken care of in these utility methods).
+ *
+ * All Package ingesters should either extend this abstract class
+ * or implement
+ * For example, a scenario may be to create a Collection based on a
+ * collection-level package, and also create an Item for every item-level
+ * package referenced by the collection-level package.
+ *
+ * The output of this method is one or more newly created
+ * The packager may choose not to implement
+ * The deposit license (Only significant for Item) is passed
+ * explicitly as a string since there is no place for it in many
+ * package formats. It is optional and may be given as
+ *
+ * For example, a scenario may be to replace a Collection based on a
+ * collection-level package, and also replace *every* Item in that collection
+ * based on the item-level packages referenced by the collection-level package.
+ *
+ * Please note that since the
+ * The output of this method is one or more replaced
+ * The packager may choose not to implement
+ * This method collects all references to other packages, so that we
+ * can choose to recursively ingest them, as necessary, alongside the
+ * DSpaceObject created from the original SIP.
+ *
+ * References are collected based on the DSpaceObject created from the SIP
+ * (this way we keep the context of these references).
+ *
+ * @param dso DSpaceObject whose SIP referenced another package
+ * @param packageRef A reference to another package, which can be ingested after this one
+ */
+ public void addPackageReference(DSpaceObject dso, String packageRef)
+ {
+ List
+ * These references should detail where another package exists which
+ * should be ingested alongside the current DSpaceObject.
+ *
+ * The
+ * This list can be useful in reporting back to the user what content has
+ * been added or replaced. It's used by ingestAll() and replaceAll() to
+ * return this list of everything that was ingested/replaced.
+ *
+ * @return List of DSpaceObjects which have been added/replaced
+ */
+ protected List
@@ -109,11 +106,11 @@
* same GROUPID
+ * This ingester recognizes two distinct types of AIPs: "Manifest-Only" and "External".
+ * The Manifest-Only AIP, which is selected by specifying a PackageParameters
+ * key "manifestOnly" with the value "true", refers to all its contents by
+ * reference only. For Community or Collection AIPs this means all references to their
+ * child objects are just via Handles. For Item AIPs all Bitreams are just
+ * referenced by their asset store location instead of finding them in the "package".
+ * The Manifest-Only AIP package format is simply a METS XML document serialized into a file.
+ *
+ * An "external" AIP (the default), is a conventional Zip-file based package
+ * that includes copies of all bitstreams referenced by the object as well
+ * as a serialized METS XML document in the path "mets.xml".
+ *
+ * Configuration keys:
+ *
+ * # instructs which xwalk plugin to use for a given type of metadata
+ * mets.dspaceAIP.ingest.crosswalk.{mdSecName} = {pluginName}
+ * mets.dspaceAIP.ingest.crosswalk.DC = QDC
+ * mets.dspaceAIP.ingest.crosswalk.DSpaceDepositLicense = NULLSTREAM
+ *
+ * # Option to save METS manifest in the item: (default is false)
+ * mets.default.ingest.preserveManifest = false
+ *
+ * @author Larry Stone
+ * @author Tim Donohue
+ * @version $Revision: 1.1 $
+ *
+ * @see AbstractMETSIngester
+ * @see AbstractPackageIngester
+ * @see PackageIngester
+ * @see org.dspace.content.packager.METSManifest
+ */
+public class DSpaceAIPIngester
+ extends AbstractMETSIngester
+{
+ /** log4j category */
+ private static Logger log = Logger.getLogger(DSpaceAIPIngester.class);
+
+ /**
+ * Ensure it's an AIP generated by the complementary AIP disseminator.
+ */
+ void checkManifest(METSManifest manifest)
+ throws MetadataValidationException
+ {
+ String profile = manifest.getProfile();
+ if (profile == null)
+ throw new MetadataValidationException("Cannot accept METS with no PROFILE attribute!");
+ else if (!profile.equals(DSpaceAIPDisseminator.PROFILE_1_0))
+ throw new MetadataValidationException("METS has unacceptable PROFILE attribute, profile="+profile);
+ }
+
+
+ /**
+ * Choose DMD section(s) to crosswalk.
+ *
+ * The algorithm is:
- * A package is a single data stream
- * containing enough information to construct an Item. It can be
- * anything from an archive like a Zip file with a manifest and metadata,
- * to a simple manifest containing external references to the content,
- * to a self-contained file such as a PDF. The interpretation
- * of the package is entirely at the discretion of the implementing class.
+ * A package is a single data stream containing enough information to
+ * construct an Object (i.e. an Item, Collection, or Community). It
+ * can be anything from an archive like a Zip file with a manifest and
+ * metadata, to a simple manifest containing external references to the
+ * content, to a self-contained file such as a PDF. The interpretation
+ * of the package is entirely at the discretion of the implementing
+ * class.
*
* The ingest methods are also given an attribute-value
* list of "parameters" which may modify their actions.
@@ -68,58 +68,151 @@
* understand different sets of parameters.
*
* @author Larry Stone
+ * @author Tim Donohue
* @version $Revision$
* @see PackageParameters
+ * @see AbstractPackageIngester
*/
public interface PackageIngester
{
/**
- * Create new Item out of the ingested package.
- * The item will belong to the indicated
- * collection. This creates a
- * The deposit license is passed explicitly as a string since there
- * is no place for it in many package formats. It is optional and may
- * be given as
+ * Use
+ * For example, a scenario may be to create a Collection based on a
+ * collection-level package, and also create an Item for every item-level
+ * package referenced by the collection-level package.
+ *
+ * The output of this method is one or more newly created
+ * The packager may choose not to implement
+ * The deposit license (Only significant for Item) is passed
+ * explicitly as a string since there is no place for it in many
+ * package formats. It is optional and may be given as
+ *
+ * Use
+ * For example, a scenario may be to replace a Collection based on a
+ * collection-level package, and also replace *every* Item in that collection
+ * based on the item-level packages referenced by the collection-level package.
+ *
+ * Please note that since the
+ * The output of this method is one or more replaced
+ * The packager may choose not to implement
* This is a generic packager framework intended to be subclassed to create
* ingesters for more specific METS "profiles". METS is an
* abstract and flexible framework that can encompass many
* different kinds of metadata and inner package structures.
+ *
*
- * Configuration:
- * If the property
+ * Configuration Properties:
+ *
- * Initialize it with the DSpace Bundle containing all of the
+ * Initialize it with the Content (ORIGINAL) Bundle containing all of the
* metadata bitstreams. Match an mdRef by finding the bitstream
* with the same name.
*/
protected class MdrefManager
implements METSManifest.Mdref
{
- private Bundle mdBundle = null;
-
- // constructor initializes metadata bundle.
- private MdrefManager(Bundle mdBundle)
+ private File packageFile = null;
+ private PackageParameters params;
+
+ // constructor initializes from package file
+ private MdrefManager(File packageFile, PackageParameters params)
{
super();
- this.mdBundle = mdBundle;
+ this.packageFile = packageFile;
+ this.params = params;
}
/**
- * Find the local Bitstream referenced in
- * an
+ * This method is similar to ingest(), except that if the object already exists in
+ * DSpace, it is deleted & replaced. The METS-based package is then used to ingest a new
+ * object in its place.
+ *
+ * In order to attempt to avoid data loss, this method first ingests a new object (with same parent)
+ * based on the METS manifest. Assuming that succeeds, the existing object is removed,
+ * and its handle is reassigned to the new object. If the ingest fails, the new object
+ * is removed and the existing object is left in tact.
+ *
+ * @param context DSpace Context
+ * @param dsoToReplace DSpace Object to be replaced (may be null if it will be specified in the METS manifest itself)
+ * @param pkgFile The package file to ingest
+ * @param params Parameters passed from the packager script
+ * @return DSpaceObject created by ingest.
+ * @throws PackageValidationException if package is unacceptable or there is
+ * a fatal error turning it into a DSpace Object.
+ * @throws IOException
+ * @throws SQLException
+ * @throws AuthorizeException
+ * @throws CrosswalkException
+ */
+ public DSpaceObject replace(Context context, DSpaceObject dsoToReplace,
+ File pkgFile, PackageParameters params)
+ throws PackageValidationException, CrosswalkException,
+ AuthorizeException, SQLException, IOException
+ {
+ //parsed out METS Manifest from the file.
+ METSManifest manifest = null;
+
+ //resulting DSpace Object
+ DSpaceObject dso = null;
+
+ try
+ {
+ log.info(LogManager.getHeader(context, "package_parse",
+ "Parsing package for replace, file=" + pkgFile.getName()));
+
+ //Parse our ingest package, extracting out the METS manifest in the package
+ manifest = parsePackage(context, pkgFile, params);
+
+ //must have a METS Manifest to replace anything
+ if (manifest == null)
+ throw new PackageValidationException("No METS Manifest found (filename="+METSManifest.MANIFEST_FILE+"). Package is unacceptable!");
+
+ //it's possible that the object to replace will be passed in as null
+ // Let's determine the handle of the object to replace
+ if(dsoToReplace==null)
{
- Bitstream pbs = (Bitstream)fileIdToBitstream.get(pbsFile.getAttributeValue("ID"));
- if (pbs == null)
- log.error("Got Primary Bitstream file ID="+pbsFile.getAttributeValue("ID")+
- ", but found no corresponding bitstream.");
- else
+ // since we don't know what we are replacing, we'll have to
+ // try to determine it from the parsed manifest
+
+ // Handle of object described by METS should be in OBJID
+ String handleURI = manifest.getObjID();
+ String handle = decodeHandleURN(handleURI);
+ try
{
- Bundle bn[] = pbs.getBundles();
- if (bn.length > 0)
- bn[0].setPrimaryBitstreamID(pbs.getID());
- else
- log.error("Sanity check, got primary bitstream without any parent bundle.");
+ // Attempt to resolve this handle to an existing object
+ dsoToReplace = HandleManager.resolveToObject(context, handle);
}
+ catch(IllegalStateException ie)
+ {
+ //we don't care if this errors out -- we can continue whether or not an object exists with this handle
+ }
}
+ // NOTE: At this point, it's still possible we don't have an object to replace
+ // This could happen when there is actually no existing object in DSpace using that handle
+ // (In which case, we're actually just doing a "restore" -- so we aren't going to throw an error or complain)
- // have subclass manage license since it may be extra package file.
- addLicense(context, collection, item, manifest, callback, license );
+ // If we were unable to find the object to replace, then assume we are restoring it
+ if(dsoToReplace==null)
+ {
+ //In order to restore an object, we must first figure out which parent it belongs to
+ DSpaceObject parent = null;
+ // Let's try to figure out the parent using the Manifest
+ // look for a Parent Object link in manifest
+ * This is a default implementation which assumes the handle of the
+ * DSpace Object can be found in the
+ * If the packager is set to 'manifest-only' (i.e. pkgFile is just a manifest),
+ * we assume the file is available for download via a URL.
+ *
+ * Otherwise, the pkgFile is a Zip, so the file should be retrieved from
+ * within that Zip package.
+ *
+ * @param pkgFile the full package file (which may include content files if a zip)
+ * @param params Parameters passed to METSIngester
+ * @param path the File path (either path in Zip package or a URL)
+ * @return the InputStream for the file
+ */
+ protected static InputStream getFileInputStream(File pkgFile, PackageParameters params, String path)
+ throws MetadataValidationException, IOException
+ {
+ //If this is a manifest only package (i.e. not a zip file)
+ if (params.getBooleanProperty("manifestOnly", false))
+ {
+ //NOTE: since we are only dealing with a METS manifest,
+ // we will assume all external files are available via URLs.
+ try
+ {
+ //attempt to open a connection to given URL
+ URL fileURL = new URL(path);
+ URLConnection connection = fileURL.openConnection();
+
+ //open stream to access file contents
+ return connection.getInputStream();
+ }
+ catch(IOException io)
+ {
+ log.error("Unable to retrieve external file from URL '" + path + "' for manifest-only METS package. All externally referenced files must be retrievable via URLs.");
+ //pass exception upwards
+ throw io;
+ }
+ }
+ else
+ {
+ //open the Zip package
+ ZipFile zipPackage = new ZipFile(pkgFile);
+
+ //Retrieve the manifest file entry by name
+ ZipEntry manifestEntry = zipPackage.getEntry(path);
+
+ //Get inputStream associated with this file
+ return zipPackage.getInputStream(manifestEntry);
+ }
+ }
+
+
+ /**
* Profile-specific tests to validate manifest. The implementation
* can access the METS document through the
- * The
- * The
- * If either of the Sets
* This framework does not add any licenses by default.
+ *
+ * Note that crosswalking rightsMD sections can also add a deposit or CC
+ * license to the object.
*
* @param context the DSpace context
* @param collection DSpace Collection to which the item is being submitted.
* @param license optional user-supplied Deposit License text (may be null)
*/
- abstract public void addLicense(Context context, Collection collection,
- Item item, METSManifest manifest,
- MdrefManager callback, String license)
- throws PackageValidationException, CrosswalkException,
+ abstract public void addLicense(Context context, Item item, String license,
+ Collection collection, PackageParameters params)
+ throws PackageValidationException,
AuthorizeException, SQLException, IOException;
/**
- * Hook for final "finishing" operations on the new Item.
- * This method is called when the new Item is otherwise complete and
+ * Hook for final "finishing" operations on the new Object.
+ * This method is called when the new Object is otherwise complete and
* ready to be returned. The implementation should use this
* opportunity to make whatever final checks and modifications are
* necessary.
*
* @param context the DSpace context
*/
- abstract public void finishItem(Context context, Item item)
+ abstract public void finishObject(Context context, DSpaceObject dso)
throws PackageValidationException, CrosswalkException,
AuthorizeException, SQLException, IOException;
+ /**
+ * Determines what type of DSpace object is represented in this METS doc.
+ * @returns one of the object types in Constants.
+ */
+ abstract public int getObjectType(METSManifest manifest)
+ throws PackageValidationException;
+
+ /**
+ * Subclass-dependent final processing on a Bitstream; could include
+ * fixing up the name, bundle, other attributes.
+ */
+ abstract public void finishBitstream(Context context,
+ Bitstream bs,
+ Element mfile,
+ METSManifest manifest,
+ PackageParameters params)
+ throws MetadataValidationException, SQLException, AuthorizeException, IOException;
+
+
+ /**
+ * Returns keyword that makes the configuration keys of this subclass
+ * unique, e.g. if it returns NAME, the key would be:
+ * "mets.NAME.ingest.preserveManifest = true"
+ */
+ abstract public String getConfigurationName();
+
}
PackageDisseminator
to better suit their needs.
+ *
+ * @author Tim Donohue
+ * @see PackageDisseminator
+ */
+public abstract class AbstractPackageDisseminator
+ implements PackageDisseminator
+{
+ /** log4j category */
+ private static Logger log = Logger.getLogger(AbstractPackageDisseminator.class);
+
+ /** List of all successfully disseminated package files */
+ private List
+ * Use the params
parameter list to adjust the way the
+ * package is made, e.g. including a "metadataOnly
"
+ * parameter might make the package a bare manifest in XML
+ * instead of a Zip file including manifest and contents.
+ *
+ * Throws an exception of the initial object is not acceptable or there is
+ * a failure creating the package.
+ *
+ * @param context DSpace context.
+ * @param dso initial DSpace object
+ * @param params Properties-style list of options specific to this packager
+ * @param pkgFile File where initial package should be written. All other
+ * packages will be written to the same directory as this File.
+ * @throws PackageValidationException if package cannot be created or there is
+ * a fatal error in creating it.
+ */
+ public List
+ * params
parameter list to adjust the way the
* package is made, e.g. including a "metadataOnly
"
* parameter might make the package a bare manifest in XML
* instead of a Zip file including manifest and contents.
- *
+ * params
parameter list to adjust the way the
+ * package is made, e.g. including a "metadataOnly
"
+ * parameter might make the package a bare manifest in XML
+ * instead of a Zip file including manifest and contents.
+ * disseminateAll
,
+ * or simply forward the call to disseminate
if it is unable to
+ * support recursive dissemination.
+ *
+ * @param context DSpace context.
+ * @param dso initial DSpace object
+ * @param params Properties-style list of options specific to this packager
+ * @param pkgFile File where initial package should be written. All other
+ * packages will be written to the same directory as this File.
+ * @return List of all package Files which were successfully disseminated
+ * @throws PackageValidationException if package cannot be created or there is
+ * a fatal error in creating it.
+ */
+ List"application/zip"
.
* Required when sending the package via HTTP, to
* provide the Content-Type header.
Index: AbstractMETSDisseminator.java
===================================================================
--- AbstractMETSDisseminator.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
+++ AbstractMETSDisseminator.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
@@ -38,64 +38,75 @@
package org.dspace.content.packager;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.Date;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
-import org.apache.log4j.Logger;
-import org.dspace.authorize.AuthorizeException;
-import org.dspace.authorize.AuthorizeManager;
-import org.dspace.content.Bitstream;
-import org.dspace.content.Bundle;
-import org.dspace.content.DSpaceObject;
-import org.dspace.content.Item;
-import org.dspace.content.crosswalk.CrosswalkException;
-import org.dspace.content.crosswalk.DisseminationCrosswalk;
-import org.dspace.core.ConfigurationManager;
-import org.dspace.core.Constants;
-import org.dspace.core.Context;
-import org.dspace.core.PluginManager;
-import org.dspace.core.Utils;
-import org.jdom.Namespace;
-import org.jdom.output.Format;
-import org.jdom.output.XMLOutputter;
-
-import edu.harvard.hul.ois.mets.Agent;
import edu.harvard.hul.ois.mets.AmdSec;
+import edu.harvard.hul.ois.mets.BinData;
import edu.harvard.hul.ois.mets.Checksumtype;
import edu.harvard.hul.ois.mets.Div;
import edu.harvard.hul.ois.mets.DmdSec;
+import edu.harvard.hul.ois.mets.MdRef;
import edu.harvard.hul.ois.mets.FLocat;
import edu.harvard.hul.ois.mets.FileGrp;
import edu.harvard.hul.ois.mets.FileSec;
import edu.harvard.hul.ois.mets.Fptr;
+import edu.harvard.hul.ois.mets.Mptr;
import edu.harvard.hul.ois.mets.Loctype;
import edu.harvard.hul.ois.mets.MdWrap;
import edu.harvard.hul.ois.mets.Mdtype;
import edu.harvard.hul.ois.mets.Mets;
import edu.harvard.hul.ois.mets.MetsHdr;
-import edu.harvard.hul.ois.mets.Name;
-import edu.harvard.hul.ois.mets.Role;
import edu.harvard.hul.ois.mets.StructMap;
import edu.harvard.hul.ois.mets.TechMD;
-import edu.harvard.hul.ois.mets.Type;
+import edu.harvard.hul.ois.mets.SourceMD;
+import edu.harvard.hul.ois.mets.DigiprovMD;
+import edu.harvard.hul.ois.mets.RightsMD;
+import edu.harvard.hul.ois.mets.helper.MdSec;
import edu.harvard.hul.ois.mets.XmlData;
+import edu.harvard.hul.ois.mets.helper.Base64;
import edu.harvard.hul.ois.mets.helper.MetsElement;
import edu.harvard.hul.ois.mets.helper.MetsException;
import edu.harvard.hul.ois.mets.helper.MetsValidator;
import edu.harvard.hul.ois.mets.helper.MetsWriter;
-import edu.harvard.hul.ois.mets.helper.PCData;
import edu.harvard.hul.ois.mets.helper.PreformedXML;
+import java.io.File;
+import java.io.FileOutputStream;
+import org.apache.log4j.Logger;
+
+import org.dspace.authorize.AuthorizeException;
+import org.dspace.authorize.AuthorizeManager;
+import org.dspace.content.Bitstream;
+import org.dspace.content.Bundle;
+import org.dspace.content.Community;
+import org.dspace.content.Collection;
+import org.dspace.content.DSpaceObject;
+import org.dspace.content.Item;
+import org.dspace.content.ItemIterator;
+import org.dspace.content.crosswalk.CrosswalkException;
+import org.dspace.content.crosswalk.CrosswalkObjectNotSupported;
+import org.dspace.content.crosswalk.DisseminationCrosswalk;
+import org.dspace.content.crosswalk.StreamDisseminationCrosswalk;
+import org.dspace.core.Constants;
+import org.dspace.core.Context;
+import org.dspace.core.PluginManager;
+import org.dspace.core.Utils;
+import org.dspace.license.CreativeCommons;
+import org.jdom.Element;
+import org.jdom.Namespace;
+import org.jdom.output.Format;
+import org.jdom.output.XMLOutputter;
+
/**
* Base class for disseminator of
* METS (Metadata Encoding & Transmission Standard) Package.
@@ -107,32 +118,33 @@
* different kinds of metadata and inner package structures.
*
- * manifestOnly
-- if true, generate a standalone XML
+ *
+ *
*
* @author Larry Stone
* @author Robert Tansley
+ * @author Tim Donohue
* @version $Revision$
*/
public abstract class AbstractMETSDisseminator
- implements PackageDisseminator
+ extends AbstractPackageDisseminator
{
/** log4j category */
private static Logger log = Logger.getLogger(AbstractMETSDisseminator.class);
- /** Filename of manifest, relative to package toplevel. */
- public static final String MANIFEST_FILE = "mets.xml";
-
// JDOM xml output writer - indented format for readability.
private static XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat());
@@ -140,157 +152,294 @@
private int idCounter = 1;
/**
- * Table of files to add to package, such as mdRef'd metadata.
- * Key is relative pathname of file, value is manifestOnly
-- if true, generate a standalone XML
* document of the METS manifest instead of a complete package. Any
* other metadata (such as licenses) will be encoded inline.
- * Default is false
.
+ * Default is false
.unauthorized
-- this determines what is done when the
- * packager encounters a Bundle or Bitstream it is not authorized to
- * read. By default, it just quits with an AuthorizeException.
+ * unauthorized
-- this determines what is done when the
+ * packager encounters a Bundle or Bitstream it is not authorized to
+ * read. By default, it just quits with an AuthorizeException.
* If this option is present, it must be one of the following values:
- * skip
-- simply exclude unreadable content from package.
- * zero
-- include unreadable bitstreams as 0-length files;
- * unreadable Bundles will still cause authorize errors.
+ *
+ *
skip
-- simply exclude unreadable content from package.zero
-- include unreadable bitstreams as 0-length files;
+ * unreadable Bundles will still cause authorize errors.InputStream
- * with contents to put in it.
- * New map is created by disseminate().
+ * Wrapper for a table of streams to add to the package, such as
+ * mdRef'd metadata. Key is relative pathname of file, value is
+ * InputStream
with contents to put in it. Some
+ * superclasses will put streams in this table when adding an mdRef
+ * element to e.g. a rightsMD segment.
*/
- protected Map extraFiles = null;
+ protected class MdStreamCache
+ {
+ private Mapparams
parameter list to adjust the way the
+ * package is made, e.g. including a "metadataOnly
"
+ * parameter might make the package a bare manifest in XML
+ * instead of a Zip file including manifest and contents.
+ * getDmdTypes()
above.
+ * object and each Bitstream in an Item. The type string may be a
+ * simple name or colon-separated compound as specified for
+ * getDmdTypes()
above.
* @param params the PackageParameters passed to the disseminator.
* @return array of metadata type strings, never null.
*/
- abstract public String getTechMdType(PackageParameters params)
+ abstract public String[] getTechMdTypes(Context context, DSpaceObject dso, PackageParameters params)
throws SQLException, IOException, AuthorizeException;
/**
- * Add Rights metadata for the Item, in the form of
- * (rightsMd
elements) to the given metadata section.
+ * Get the type string of the source metadata to create for each
+ * object and each Bitstream in an Item. The type string may be a
+ * simple name or colon-separated compound as specified for
+ * getDmdTypes()
above.
+ * @param params the PackageParameters passed to the disseminator.
+ * @return array of metadata type strings, never null.
+ */
+ abstract public String[] getSourceMdTypes(Context context, DSpaceObject dso, PackageParameters params)
+ throws SQLException, IOException, AuthorizeException;
+
+ /**
+ * Get the type string of the "digiprov" (digital provenance)
+ * metadata to create for each object and each Bitstream in an Item.
+ * The type string may be a simple name or colon-separated compound
+ * as specified for getDmdTypes()
above.
*
+ * @param params the PackageParameters passed to the disseminator.
+ * @return array of metadata type strings, never null.
*/
- abstract public void addRightsMd(Context context, Item item, AmdSec amdSec)
- throws SQLException, IOException, AuthorizeException, MetsException;
+ abstract public String[] getDigiprovMdTypes(Context context, DSpaceObject dso, PackageParameters params)
+ throws SQLException, IOException, AuthorizeException;
/**
+ * Get the type string of the "rights" (permission and/or license)
+ * metadata to create for each object and each Bitstream in an Item.
+ * The type string may be a simple name or colon-separated compound
+ * as specified for getDmdTypes()
above.
+ *
+ * @param params the PackageParameters passed to the disseminator.
+ * @return array of metadata type strings, never null.
+ */
+ abstract public String[] getRightsMdTypes(Context context, DSpaceObject dso, PackageParameters params)
+ throws SQLException, IOException, AuthorizeException;
+
+ /**
* Add any additional structMap
elements to the
* METS document, as required by this subclass. A simple default
* structure map which fulfills the minimal DSpace METS DIP/SIP
* requirements is already present, so this does not need to do anything.
* @param mets the METS document to which to add structMaps
*/
- abstract public void addStructMap(Context context, Item item,
+ abstract public void addStructMap(Context context, DSpaceObject dso,
PackageParameters params, Mets mets)
throws SQLException, IOException, AuthorizeException, MetsException;
+
+ /**
+ * @return true when this bundle should be included as "content"
+ * in the package.. e.g. DSpace SIP does not include metadata bundles.
+ */
+ abstract public boolean includeBundle(Bundle bundle);
}
Index: PackageParameters.java
===================================================================
--- PackageParameters.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
+++ PackageParameters.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
@@ -161,4 +161,203 @@
stringValue.equalsIgnoreCase("on") ||
stringValue.equalsIgnoreCase("yes");
}
+
+
+ /**
+ * Utility method to tell if workflow is enabled for Item ingestion.
+ * Checks the Packager parameters.
+ * PackageIngester
to better suit their needs.
+ *
+ * @author Tim Donohue
+ * @see PackageIngester
+ */
+public abstract class AbstractPackageIngester
+ implements PackageIngester
+{
+ /** log4j category */
+ private static Logger log = Logger.getLogger(AbstractPackageIngester.class);
+
+ /**
+ * References to other packages -- these are the next packages to ingest recursively
+ * Key = DSpace Object just ingested, Value = List of all packages relating to a DSpaceObject
+ **/
+ private MapDspaceObject
s.
+ *
ingestAll
,
+ * or simply forward the call to ingest
if it is unable to support
+ * recursive ingestion.
+ * null
.
+ *
+ * @param context DSpace context.
+ * @param parent parent under which to create the initial object
+ * (may be null -- in which case ingester must determine parent from package
+ * or throw an error).
+ * @param pkgFile The initial package file to ingest
+ * @param params Properties-style list of options (interpreted by each packager).
+ * @param license may be null, which takes default license.
+ * @return List of DSpaceObjects created
+ *
+ * @throws PackageValidationException if initial package (or any referenced package)
+ * is unacceptable or there is a fatal error in creating a DSpaceObject
+ * @throws UnsupportedOperationException if this packager does not
+ * implement ingestAll
+ */
+ public Listdso
. All other
+ * objects are replaced based on information provided in the referenced packages.
+ * dso
input only specifies the
+ * initial object to replace, any additional objects to replace must be
+ * determined based on the referenced packages (or initial package itself).
+ * DspaceObject
s.
+ *
replaceAll
,
+ * since it somewhat contradicts the archival nature of DSpace. It also
+ * may choose to forward the call to replace
if it is unable to
+ * support recursive replacement.
+ *
+ * @param context DSpace context.
+ * @param dso initial existing DSpace Object to be replaced, may be null
+ * if object to replace can be determined from package
+ * @param pkgFile The package file to ingest.
+ * @param params Properties-style list of options specific to this packager
+ * @return List of DSpaceObjects replaced
+ *
+ * @throws PackageValidationException if initial package (or any referenced package)
+ * is unacceptable or there is a fatal error in creating a DSpaceObject
+ * @throws UnsupportedOperationException if this packager does not
+ * implement replaceAll
+ */
+ public ListAbstractPackageIngester
or an equivalent SIP handler is expected
+ * to understand how to deal with these package references.
+ *
+ * @param dso DSpaceObject whose SIP referenced other SIPs
+ * @return List of Strings which are the references to external submission ingestion packages
+ * (may be null if no SIPs were referenced)
+ */
+ public List
* 4. Crosswalk remaining DMDs not eliminated already.
*/
- public void chooseItemDmd(Context context, Item item,
+ public void crosswalkObjectDmd(Context context, DSpaceObject dso,
METSManifest manifest,
AbstractMETSIngester.MdrefManager callback,
Element dmds[], PackageParameters params)
- throws CrosswalkException,
+ throws CrosswalkException, PackageValidationException,
AuthorizeException, SQLException, IOException
{
int found = -1;
@@ -152,7 +149,7 @@
String groupID = null;
if (found >= 0)
{
- manifest.crosswalkItem(context, item, dmds[found], callback);
+ manifest.crosswalkItemDmd(context, dso, dmds[found], callback);
groupID = dmds[found].getAttributeValue("GROUPID");
if (groupID != null)
@@ -161,7 +158,7 @@
{
String g = dmds[i].getAttributeValue("GROUPID");
if (g != null && !g.equals(groupID))
- manifest.crosswalkItem(context, item, dmds[i], callback);
+ manifest.crosswalkItemDmd(context, dso, dmds[i], callback);
}
}
}
@@ -171,7 +168,7 @@
else
{
if (dmds.length > 0)
- manifest.crosswalkItem(context, item, dmds[0], callback);
+ manifest.crosswalkItemDmd(context, dso, dmds[0], callback);
}
}
@@ -182,52 +179,86 @@
* default deposit license.
* For Creative Commons, look for a rightsMd containing a CC license.
*/
- public void addLicense(Context context, Collection collection,
- Item item, METSManifest manifest,
- AbstractMETSIngester.MdrefManager callback,
- String license)
- throws PackageValidationException, CrosswalkException,
+ public void addLicense(Context context, Item item, String license,
+ Collection collection, PackageParameters params)
+ throws PackageValidationException,
AuthorizeException, SQLException, IOException
{
+ if (PackageUtils.findDepositLicense(context, item) == null)
PackageUtils.addDepositLicense(context, license, item, collection);
+ }
- // If package includes a Creative Commons license, add that:
- Element rmds[] = manifest.getItemRightsMD();
- for (int i = 0; i < rmds.length; ++i)
+ public void finishObject(Context context, DSpaceObject dso)
+ throws PackageValidationException, CrosswalkException,
+ AuthorizeException, SQLException, IOException
+ {
+ // nothing to do.
+ }
+
+ public int getObjectType(METSManifest manifest)
+ throws PackageValidationException
+ {
+ return Constants.ITEM;
+ }
+
+ // return name of derived file as if MediaFilter created it, or null
+ // only needed when importing a SIP without canonical DSpace derived file naming.
+ private String makeDerivedFilename(String bundleName, String origName)
+ {
+ // get the MediaFilter that would create this bundle:
+ String mfNames[] = PluginManager.getAllPluginNames(MediaFilter.class);
+
+ for (int i = 0; i < mfNames.length; ++i)
+ {
+ MediaFilter mf = (MediaFilter)PluginManager.getNamedPlugin(MediaFilter.class, mfNames[i]);
+ if (bundleName.equals(mf.getBundleName()))
+ return mf.getFilteredName(origName);
+ }
+ return null;
+ }
+
+ /**
+ * Take a second pass over files to correct names of derived files
+ * (e.g. thumbnails, extracted text) to what DSpace expects:
+ */
+ public void finishBitstream(Context context,
+ Bitstream bs,
+ Element mfile,
+ METSManifest manifest,
+ PackageParameters params)
+ throws MetadataValidationException, SQLException, AuthorizeException, IOException
+ {
+ String bundleName = METSManifest.getBundleName(mfile);
+ if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME))
{
- String type = manifest.getMdType(rmds[i]);
- if (type != null && type.equals("Creative Commons"))
+ String opath = manifest.getOriginalFilePath(mfile);
+ if (opath != null)
{
- log.debug("Got Creative Commons license in rightsMD");
- CreativeCommons.setLicense(context, item,
- manifest.getMdContentAsStream(rmds[i], callback),
- manifest.getMdContentMimeType(rmds[i]));
+ // String ofileId = origFile.getAttributeValue("ID");
+ // Bitstream obs = (Bitstream)fileIdToBitstream.get(ofileId);
- // if there was a bitstream, get rid of it, since
- // it's just an artifact now that the CC license is installed.
- Element mdRef = rmds[i].getChild("mdRef", METSManifest.metsNS);
- if (mdRef != null)
+ String newName = makeDerivedFilename(bundleName, opath);
+
+ if (newName != null)
{
- Bitstream bs = callback.getBitstreamForMdRef(mdRef);
- if (bs != null)
- {
- Bundle parent[] = bs.getBundles();
- if (parent.length > 0)
- {
- parent[0].removeBitstream(bs);
- parent[0].update();
- }
- }
+ //String mfileId = mfile.getAttributeValue("ID");
+ //Bitstream bs = (Bitstream)fileIdToBitstream.get(mfileId);
+ bs.setName(newName);
+ bs.update();
}
}
}
}
- // last change to fix up Item.
- public void finishItem(Context context, Item item)
- throws PackageValidationException, CrosswalkException,
- AuthorizeException, SQLException, IOException
+
+ public String getConfigurationName()
{
- // nothing to do.
+ return "dspaceSIP";
}
+
+
+ public boolean probe(Context context, InputStream in, PackageParameters params)
+ {
+ throw new UnsupportedOperationException("PDF package ingester does not implement probe()");
+ }
}
Index: DSpaceAIPIngester.java
===================================================================
--- DSpaceAIPIngester.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 0)
+++ DSpaceAIPIngester.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
@@ -0,0 +1,281 @@
+/*
+ * DSpaceAIPIngester
+ *
+ * Version: $Revision: 1.1 $
+ *
+ * Date: $Date: 2006/03/17 00:04:38 $
+ *
+ * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
+ * Institute of Technology. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Hewlett-Packard Company nor the name of the
+ * Massachusetts Institute of Technology nor the names of their
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+package org.dspace.content.packager;
+
+import java.io.IOException;
+import java.sql.SQLException;
+
+import org.jdom.Element;
+
+import org.apache.log4j.Logger;
+import org.dspace.authorize.AuthorizeException;
+import org.dspace.content.Bitstream;
+import org.dspace.content.Collection;
+import org.dspace.content.DSpaceObject;
+import org.dspace.content.Item;
+import org.dspace.content.crosswalk.CrosswalkException;
+import org.dspace.content.crosswalk.MetadataValidationException;
+import org.dspace.core.Context;
+import org.dspace.core.Constants;
+
+/**
+ * Subclass of the METS packager framework to ingest a DSpace
+ * Archival Information Package (AIP). The AIP is intended to be, foremost,
+ * a _complete_ and _accurate_ representation of one object in the DSpace
+ * object model. An AIP contains all of the information needed to restore
+ * the object precisely in another DSpace archive instance.
+ *
+ * 1. Use whatever the dmd
parameter specifies as the primary DMD.
+ * 2. If (1) is unspecified, find DIM (preferably) or MODS as primary DMD.
+ * 3. If (1) or (2) succeeds, crosswalk it and ignore all other DMDs with
+ * same GROUPID
+ * 4. Crosswalk remaining DMDs not eliminated already.
+ */
+ public void crosswalkObjectDmd(Context context, DSpaceObject dso,
+ METSManifest manifest,
+ AbstractMETSIngester.MdrefManager callback,
+ Element dmds[], PackageParameters params)
+ throws CrosswalkException, PackageValidationException,
+ AuthorizeException, SQLException, IOException
+ {
+ int found = -1;
+
+ // Check to see what dmdSec the user specified in the 'dmd' parameter
+ String userDmd = null;
+ if (params != null)
+ userDmd = params.getProperty("dmd");
+ if (userDmd != null && userDmd.length() > 0)
+ {
+ for (int i = 0; i < dmds.length; ++i)
+ if (userDmd.equalsIgnoreCase(manifest.getMdType(dmds[i])))
+ found = i;
+ }
+
+ // DIM is preferred, if nothing specified by user
+ if (found == -1)
+ {
+ // DIM is preferred for AIP
+ for (int i = 0; i < dmds.length; ++i)
+ //NOTE: METS standard actually says this should be DIM (all uppercase). But,
+ // just in case, we're going to be a bit more forgiving.
+ if ("DIM".equalsIgnoreCase(manifest.getMdType(dmds[i])))
+ found = i;
+ }
+
+ // MODS is acceptable otehrwise..
+ if (found == -1)
+ {
+ for (int i = 0; i < dmds.length; ++i)
+ //NOTE: METS standard actually says this should be MODS (all uppercase). But,
+ // just in case, we're going to be a bit more forgiving.
+ if ("MODS".equalsIgnoreCase(manifest.getMdType(dmds[i])))
+ found = i;
+ }
+
+ String groupID = null;
+ if (found >= 0)
+ {
+ manifest.crosswalkItemDmd(context, dso, dmds[found], callback);
+ groupID = dmds[found].getAttributeValue("GROUPID");
+
+ if (groupID != null)
+ {
+ for (int i = 0; i < dmds.length; ++i)
+ {
+ String g = dmds[i].getAttributeValue("GROUPID");
+ if (g != null && !g.equals(groupID))
+ manifest.crosswalkItemDmd(context, dso, dmds[i], callback);
+ }
+ }
+ }
+
+ // otherwise take the first. Don't xwalk more than one because
+ // each xwalk _adds_ metadata, and could add duplicate fields.
+ else if (dmds.length > 0)
+ {
+ manifest.crosswalkItemDmd(context, dso, dmds[0], callback);
+ }
+
+ // it's an error if there is nothing to crosswalk:
+ else
+ throw new MetadataValidationException("DSpaceAIPIngester: Could not find an acceptable object-wide DMD section in manifest.");
+ }
+
+
+ /**
+ * Ignore license when restoring an manifest-only AIP, since it should
+ * be a bitstream in the AIP already.
+ * Otherwise: Check item for license first; then, take deposit
+ * license supplied by explicit argument next, else use collection's
+ * default deposit license.
+ * Normally the rightsMD crosswalks should provide a license.
+ */
+ public void addLicense(Context context, Item item, String license,
+ Collection collection, PackageParameters params)
+ throws PackageValidationException,
+ AuthorizeException, SQLException, IOException
+ {
+ boolean newLicense = false;
+
+ if(!params.restoreModeEnabled())
+ {
+ //AIP is not being restored/replaced, so treat it like a SIP -- every new SIP needs a new license
+ newLicense = true;
+ }
+
+ // Add deposit license if there isn't one in the object,
+ // and it's not a restoration of an "manifestOnly" AIP:
+ if (!params.getBooleanProperty("manifestOnly", false) &&
+ PackageUtils.findDepositLicense(context, item) == null)
+ {
+ newLicense = true;
+ }
+
+ if(newLicense)
+ PackageUtils.addDepositLicense(context, license, item, collection);
+ }
+
+ /**
+ * Last change to fix up a DSpace Object
+ */
+ public void finishObject(Context context, DSpaceObject dso)
+ throws PackageValidationException, CrosswalkException,
+ AuthorizeException, SQLException, IOException
+ {
+ // nothing to do.
+ }
+
+ /**
+ * Nothing extra to do to bitstream after ingestion.
+ */
+ public void finishBitstream(Context context,
+ Bitstream bs,
+ Element mfile,
+ METSManifest manifest,
+ PackageParameters params)
+ throws MetadataValidationException, SQLException, AuthorizeException, IOException
+ {
+ // nothing to do.
+ }
+
+ /**
+ * Return the type of DSpaceObject in this package; it is
+ * in the TYPE attribute of the mets:mets element.
+ */
+ public int getObjectType(METSManifest manifest)
+ throws PackageValidationException
+ {
+ Element mets = manifest.getMets();
+ String typeStr = mets.getAttributeValue("TYPE");
+ if (typeStr == null || typeStr.length() == 0)
+ throw new PackageValidationException("Manifest is missing the required mets@TYPE attribute.");
+ if (typeStr.startsWith("DSpace "))
+ typeStr = typeStr.substring(7);
+ int type = Constants.getTypeID(typeStr);
+ if (type < 0)
+ throw new PackageValidationException("Manifest has unrecognized value in mets@TYPE attribute: "+typeStr);
+ return type;
+ }
+
+ /**
+ * Name used to distinguish DSpace Configuration entries for this subclass.
+ */
+ public String getConfigurationName()
+ {
+ return "dspaceAIP";
+ }
+
+}
Index: PackageIngester.java
===================================================================
--- PackageIngester.java (.../dspace/trunk/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
+++ PackageIngester.java (.../sandbox/aip-external-1_6-prototype/dspace-api/src/main/java/org/dspace/content/packager) (revision 5257)
@@ -38,28 +38,28 @@
package org.dspace.content.packager;
+import java.io.File;
import java.io.IOException;
-import java.io.InputStream;
import java.sql.SQLException;
+import java.util.List;
import org.dspace.authorize.AuthorizeException;
-import org.dspace.content.Collection;
-import org.dspace.content.Item;
-import org.dspace.content.WorkspaceItem;
+import org.dspace.content.DSpaceObject;
import org.dspace.content.crosswalk.CrosswalkException;
import org.dspace.core.Context;
-
+
/**
* Plugin Interface to interpret a Submission Information Package (SIP)
- * and create (or replace) a DSpace item from its contents.
+ * and create (or replace) a DSpace Object from its contents.
* WorkspaceItem
, so it is
- * up to the caller to decide whether to install it or submit
- * it to normal DSpace Workflow.
+ * Create new DSpaceObject out of the ingested package. The object
+ * is created under the indicated parent. This creates a
+ * DSpaceObject
. For Items, it is up to the caller to
+ * decide whether to install it or submit it to normal DSpace Workflow.
* null
.
+ * The deposit license (Only significant for Item) is passed
+ * explicitly as a string since there is no place for it in many
+ * package formats. It is optional and may be given as
+ * null
.
+ * ingestAll
method to perform a recursive ingest of all
+ * packages which are referenced by an initial package.
*
* @param context DSpace context.
- * @param collection collection under which to create new item.
- * @param in input stream containing package to ingest.
+ * @param parent parent under which to create new object
+ * (may be null -- in which case ingester must determine parent from package
+ * or throw an error).
+ * @param pkgFile The package file to ingest
* @param params Properties-style list of options (interpreted by each packager).
* @param license may be null, which takes default license.
- * @return workspace item created by ingest.
+ * @return DSpaceObject created by ingest.
*
* @throws PackageValidationException if package is unacceptable or there is
- * a fatal error turning it into an Item.
+ * a fatal error turning it into a DSpaceObject.
*/
- WorkspaceItem ingest(Context context, Collection collection, InputStream in,
+ DSpaceObject ingest(Context context, DSpaceObject parent, File pkgFile,
PackageParameters params, String license)
throws PackageException, CrosswalkException,
AuthorizeException, SQLException, IOException;
+
/**
- * Replace an existing Item with contents of the ingested package.
+ * Recursively create one or more DSpace Objects out of the contents
+ * of the ingested package (and all other referenced packages).
+ * The initial object is created under the indicated parent. All other
+ * objects are created based on their relationship to the initial object.
+ * DspaceObject
s.
+ *
ingestAll
,
+ * or simply forward the call to ingest
if it is unable to support
+ * recursive ingestion.
+ * null
.
+ *
+ * @param context DSpace context.
+ * @param parent parent under which to create the initial object
+ * (may be null -- in which case ingester must determine parent from package
+ * or throw an error).
+ * @param pkgFile The initial package file to ingest
+ * @param params Properties-style list of options (interpreted by each packager).
+ * @param license may be null, which takes default license.
+ * @return List of DSpaceObjects created
+ *
+ * @throws PackageValidationException if initial package (or any referenced package)
+ * is unacceptable or there is a fatal error in creating a DSpaceObject
+ * @throws UnsupportedOperationException if this packager does not
+ * implement ingestAll
+ */
+ Listreplace
,
* since it somewhat contradicts the archival nature of DSpace.
* The exact function of this method is highly implementation-dependent.
+ * replaceAll
method to perform a recursive replace of
+ * objects referenced by a set of packages.
*
* @param context DSpace context.
- * @param item existing item to be replaced
- * @param in input stream containing package to ingest.
+ * @param dso existing DSpace Object to be replaced, may be null
+ * if object to replace can be determined from package
+ * @param pkgFile The package file to ingest.
* @param params Properties-style list of options specific to this packager
- * @return item re-created by ingest.
+ * @return DSpaceObject with contents replaced
*
* @throws PackageValidationException if package is unacceptable or there is
* a fatal error turning it into an Item.
* @throws UnsupportedOperationException if this packager does not
* implement replace
.
*/
- Item replace(Context context, Item item, InputStream in,
- PackageParameters params)
+ DSpaceObject replace(Context context, DSpaceObject dso,
+ File pkgFile, PackageParameters params)
throws PackageException, UnsupportedOperationException,
CrosswalkException, AuthorizeException,
SQLException, IOException;
-
+
+ /**
+ * Recursively replace one or more DSpace Objects out of the contents
+ * of the ingested package (and all other referenced packages).
+ * The initial object to replace is indicated by dso
. All other
+ * objects are replaced based on information provided in the referenced packages.
+ * dso
input only specifies the
+ * initial object to replace, any additional objects to replace must be
+ * determined based on the referenced packages (or initial package itself).
+ * DspaceObject
s.
+ *
replaceAll
,
+ * since it somewhat contradicts the archival nature of DSpace. It also
+ * may choose to forward the call to replace
if it is unable to
+ * support recursive replacement.
+ *
+ * @param context DSpace context.
+ * @param dso initial existing DSpace Object to be replaced, may be null
+ * if object to replace can be determined from package
+ * @param pkgFile The package file to ingest.
+ * @param params Properties-style list of options specific to this packager
+ * @return List of DSpaceObjects replaced
+ *
+ * @throws PackageValidationException if initial package (or any referenced package)
+ * is unacceptable or there is a fatal error in creating a DSpaceObject
+ * @throws UnsupportedOperationException if this packager does not
+ * implement replaceAll
+ */
+ List
+ * METS (Metadata Encoding & Transmission Standard) Packages.
* See http://www.loc.gov/standards/mets/
* mets.submission.preserveManifest
is true,
+ * Package Parameters:
+ *
+ *
+ * validate
-- true/false attempt to schema-validate the METS manifest.manifestOnly
-- package consists only of a manifest document.ignoreHandle
-- true/false, ignore AIP's idea of handle when ingesting.ignoreParent
-- true/false, ignore AIP's idea of parent when ingesting.
+ *
+ *
* @author Larry Stone
+ * @author Tim Donohue
* @version $Revision$
* @see org.dspace.content.packager.METSManifest
+ * @see AbstractPackageIngester
+ * @see PackageIngester
*/
public abstract class AbstractMETSIngester
- implements PackageIngester
+ extends AbstractPackageIngester
{
/** log4j category */
private static Logger log = Logger.getLogger(AbstractMETSIngester.class);
- /** Filename of manifest, relative to package toplevel. */
- public static final String MANIFEST_FILE = "mets.xml";
-
- // bitstream format name of magic METS SIP format..
- private static final String MANIFEST_BITSTREAM_FORMAT =
- "DSpace METS SIP";
-
- // value of mets.submission.preserveManifest config key
- private static final boolean preserveManifest =
- ConfigurationManager.getBooleanProperty("mets.submission.preserveManifest", false);
-
- // value of mets.submission.useCollectionTemplate config key
- private static final boolean useTemplate =
- ConfigurationManager.getBooleanProperty("mets.submission.useCollectionTemplate", false);
-
/**
- * An instance of MdrefManager holds the state needed to
- * retrieve the contents (or bitstream corresponding to) an
- * external metadata stream referenced by an mets.CONFIGNAME.ingest.preserveManifest
- if true,
* the METS manifest itself is preserved in a bitstream named
* mets.xml
in the METADATA
bundle. If it is
- * false (the default), the manifest is discarded after ingestion.
+ * false (the default), the manifest is discarded after ingestion.mets.CONFIGNAME.ingest.manifestBitstreamFormat
- short name of
+ * the bitstream format to apply to the manifest; MUST be specified when
+ * preserveManifest is true.mets.default.ingest.crosswalk.MD_SEC_NAME
= PLUGIN_NAME
+ * Establishes a default crosswalk plugin for the given type of metadata
+ * in a METS mdSec (e.g. "DC", "MODS"). The plugin may be either a stream
+ * or XML-oriented ingestion crosswalk. Subclasses can override the
+ * default mapping with their own, substituting their configurationName
+ * for "default" in the configuration property key above.mets.CONFIGNAME.ingest.useCollectionTemplate
- if true,
+ * when an item is created, use the collection template. If it is
+ * false (the default), any existing collection template is ignored.mdRef
- * element in the METS manifest.
+ * An instance of ZipMdrefManager holds the state needed to
+ * retrieve the contents of an external metadata stream
+ * referenced by an mdRef
+ * element in a Zipped up METS manifest.
* mdRef
element.
- * @param mdref the METS mdRef element to locate the bitstream for.
- * @return bitstream or null if none found.
- */
- public Bitstream getBitstreamForMdRef(Element mdref)
- throws MetadataValidationException, IOException, SQLException, AuthorizeException
- {
- String path = METSManifest.getFileName(mdref);
- if (mdBundle == null)
- throw new MetadataValidationException("Failed referencing mdRef element, because there were no metadata files.");
- return mdBundle.getBitstreamByName(path);
- }
-
- /**
* Make the contents of an external resource mentioned in
* an mdRef
element available as an InputStream
.
* See the METSManifest.MdRef
interface for details.
+ *
* @param mdref the METS mdRef element to locate the input for.
* @return the input stream of its content.
+ * @see METSManifest
*/
public InputStream getInputStream(Element mdref)
- throws MetadataValidationException, IOException, SQLException, AuthorizeException
+ throws MetadataValidationException, IOException
{
- Bitstream mdbs = getBitstreamForMdRef(mdref);
- if (mdbs == null)
- throw new MetadataValidationException("Failed dereferencing bitstream for mdRef element="+mdref.toString());
- return mdbs.retrieve();
+ String path = METSManifest.getFileName(mdref);
+ if (packageFile == null)
+ throw new MetadataValidationException("Failed referencing mdRef element, because there is no package specified.");
+
+ //Use the 'getFileInputStream()' method from the AbstractMETSIngester to retrieve
+ // the inputstream for the referenced external metadata file.
+ return AbstractMETSIngester.getFileInputStream(packageFile, params, path);
}
- }
+ }//end MdrefManager class
/**
- * Create a new DSpace item out of a METS content package.
+ * Create a new DSpace object out of a METS content package.
* All contents are dictated by the METS manifest.
- * Package is a ZIP archive, all files relative to top level
+ * Package is a ZIP archive (or optionally bare manifest XML document).
+ * In a Zip, all files relative to top level
* and the manifest (as per spec) in mets.xml.
*
- * @param context - DSpace context.
- * @param collection - collection under which to create new item.
- * @param pkg - input stream containing package to ingest.
- * @param license - may be null, which takes default license.
- * @return workspace item created by ingest.
+ * @param context DSpace context.
+ * @param parent parent under which to create new object
+ * (may be null -- in which case ingester must determine parent from package
+ * or throw an error).
+ * @param pkgFile The package file to ingest
+ * @param params Properties-style list of options (interpreted by each packager).
+ * @param license may be null, which takes default license.
+ * @return DSpaceObject created by ingest.
+ *
* @throws PackageValidationException if package is unacceptable or there is
- * a fatal error turning it into an Item.
+ * a fatal error turning it into a DSpaceObject.
+ * @throws CrosswalkException
+ * @throws AuthorizeException
+ * @throws SQLException
+ * @throws IOException
*/
- public WorkspaceItem ingest(Context context, Collection collection,
- InputStream pkg, PackageParameters params,
+ public DSpaceObject ingest(Context context, DSpaceObject parent,
+ File pkgFile, PackageParameters params,
String license)
throws PackageValidationException, CrosswalkException,
AuthorizeException, SQLException, IOException
{
- ZipInputStream zip = new ZipInputStream(pkg);
- HashMap fileIdToBitstream = new HashMap();
- WorkspaceItem wi = null;
- boolean success = false;
- HashSet packageFiles = new HashSet();
+ //parsed out METS Manifest from the file.
+ METSManifest manifest = null;
- boolean validate = params.getBooleanProperty("validate", true);
+ //new DSpace object created
+ DSpaceObject dso = null;
try
{
- /* 1. Read all the files in the Zip into bitstreams first,
- * because we only get to take one pass through a Zip input
- * stream. Give them temporary bitstream names corresponding
- * to the same names they had in the Zip, since those MUST
- * match the URL references in manifest
* variable, an instance of METSManifest
.
@@ -519,42 +1156,12 @@
throws MetadataValidationException;
/**
- * Hook for subclass to modify the test of the package's
- * integrity, and add other tests. E.g. evaluate a PGP signature of
- * the manifest in a separate file.
- * packageFiles
contains "extra" files that were in
- * the package but were not referenced by the METS manifest (either as
- * content or metadata (mdRefs)).
- * The implementation of this method should look for any "extra" files
- * uses (e.g. a checksum or cryptographic signature for the manifest
- * itself) and remove them from the Set.
- * missingFiles
set is for
- * any files
- * referenced by the manifest but not found in the package.
- * The implementation can check it for "false positives", or add
- * other missing files it knows of.
- * missingFiles
- * or packageFiles
- * is not empty, the ingest will fail.
- *
- * @param packageFiles files in package but not referenced by METS
- * @param missingFiles files referenced by manifest but not in package
- *
- */
- abstract public void checkPackageFiles(Set packageFiles, Set missingFiles,
- METSManifest manifest)
- throws PackageValidationException, CrosswalkException;
-
- /**
* Select the dmdSec
element(s) to apply to the
* Item. The implementation is responsible for choosing which
* (if any) of the metadata sections to crosswalk to get the
* descriptive metadata for the item being ingested. It is
* responsible for calling the crosswalk, using the manifest's helper
- * i.e. manifest.crosswalkItem(context,item,dmdElement,callback);
+ * i.e. manifest.crosswalkItemDmd(context,item,dmdElement,callback);
* (The callback
argument is a reference to itself since the
* class also implements the METSManifest.MdRef
interface
* to fetch package files referenced by mdRef elements.)
@@ -569,10 +1176,10 @@
* @param dmds array of Elements, each a METS dmdSec
that applies to the Item as a whole.
* @param params any user parameters passed to the Packager script
*/
- abstract public void chooseItemDmd(Context context, Item item,
+ abstract public void crosswalkObjectDmd(Context context, DSpaceObject dso,
METSManifest manifest, MdrefManager callback,
Element dmds[], PackageParameters params)
- throws CrosswalkException,
+ throws CrosswalkException, PackageValidationException,
AuthorizeException, SQLException, IOException;
/**
@@ -587,28 +1194,56 @@
* information of interest, e.g. a Creative Commons license.
*