Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

The table below shows the meaning of the prefixes:

PrefixDescription
extract-Script prefixes for the data extraction step
transform-Script prefixes for the data transformation step
load-Script prefixes for the data loading step
func-Generic functions
map-Script for mapping DSpace data to the VIVO vocabulary. These scripts contain the SPARQL constuct queries needed for the mapping
produce-Production scripts for the various lists needed for ETL processes

Specific scripts

The directory also contains scripts dedicated to specific actions

...

Code Block
languagebash
title00-env.sh content
linenumberstrue
collapsetrue
#!/bin/bash

###################################################################
# Script Name   : 00-env.sh
# Description   : This file is used to define the environment variables 
#                 needed to run the extract/transform/load (ETL) 
#                 process of dspace2vivo
# Args          : 
# Author        : Michel Héon PhD
# Institution   : Université du Québec à Montréal
# Copyright     : Université du Québec à Montréal (c) 2022
# Email         : heon.michel@uqam.ca
###################################################################
# Scripts root directory
export LOC_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd -P)"

###################################################################
# Root installation directory of the different dspace2vivo packages
export INSTALLER_DIR=$(cd $LOC_SCRIPT_DIR/../../../releng/org.vivoweb.dspacevivo.installer ; pwd -P)

###################################################################
# Project root variables
source $INSTALLER_DIR/00-env.sh

###################################################################
# Executable and script path needed to run dspace2VIVO
PATH=$LOC_SCRIPT_DIR:$PATH

###################################################################
# Working directory of scripts
export WORKDIR=$(cd $LOC_SCRIPT_DIR/../; pwd -P)

###################################################################
# Directory of resources needed to configure the expected operation of the scripts
export RESSOURCESDIR=$(cd $WORKDIR/src/main/resources ; pwd -P)

###################################################################
# Directory containing the correspondence files between DSpace values and VIVO values
export MAPPING_DATA_DIR=$(cd $RESSOURCESDIR/mapping_data ; pwd -P)

###################################################################
# Resource directories after compilation. This directory is modified at each compilation (Do not edit)
export RESSOURCES_TARGET_DIR=$(cd $WORKDIR/target/classes ; pwd -P)

###################################################################
# Directory containing the queries necessary for the execution of SPARQL
export QUERY_DIR=$(cd $RESSOURCESDIR/query ; pwd -P)

###################################################################
# Repositories containing transient data from the extract/transform/load process
export DATA_DIR=$(cd $WORKDIR/data ; pwd -P)
export DATA_DEMO6_DIR=$(cd $WORKDIR/data_src_dspace6 ; pwd -P)
export DATA_DEMO7_DIR=$(cd $WORKDIR/data_src_dspace7 ; pwd -P)

###################################################################
# Data transition sub-directories for each step of the ETL process
export ETL_DIR_EXTRACT=$DATA_DIR/extract
export ETL_DIR_TRANSFORM=$DATA_DIR/transform
export ETL_DIR_TRANSFORM_DOC_TYPE=$(cd ${ETL_DIR_TRANSFORM}_doc_type ; pwd -P)
export ETL_DIR_TRANSFORM_PERSON=$(cd ${ETL_DIR_TRANSFORM}_person ; pwd -P)
export ETL_DIR_TRANSFORM_EXPERTISES=$(cd ${ETL_DIR_TRANSFORM}_expertises ; pwd -P)
export ETL_DIR_TRANSFORM_PERSON_EXPERTISES=$(cd ${ETL_DIR_TRANSFORM}_person_expertises ; pwd -P)

ETL-migration-DSpace-VIVO.sh

Code Block
languagebash
titleETL-migration-DSpace-VIVO
linenumberstrue
collapsetrue
#!/bin/bash

###################################################################
# Script Name   :
# Description   : This script encapsulates the functions call allowing the migration of DSpace Demo(6&7) data into VIVO
# Args          : 
# Author       	: Michel Héon PhD
# Institution   : Université du Québec à Montréal
# Copyright     : Université du Québec à Montréal (c) 2022
# Email         : heon.michel@uqam.ca
###################################################################
export SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd -P)"
source $SCRIPT_DIR/00-env.sh
cd $SCRIPT_DIR

###################################################################
# Clean and setup up data directories and properties
cp $RESSOURCESDIR/*.conf $RESSOURCES_TARGET_DIR
flush_data_dspace.sh 2>/dev/null
flush_data_dspace6.sh 2>/dev/null 
flush_data_dspace7.sh 2>/dev/null
###################################################################
# Extract dspace(6-7) demo data
./extract-dspace6.sh 
./extract-dspace7.sh 
cp -r $DATA_DEMO6_DIR/* $DATA_DEMO7_DIR/* $DATA_DIR

###################################################################
# Produce all list
echo run produce-list-of-expertise.sh
produce-list-of-expertise.sh

###########################
echo run produce-list-of-itemtype.sh
produce-list-of-itemtype.sh

###########################
echo run produce-list-of-persons.sh
produce-list-of-persons.sh

###################################################################
# Process transformation and load to VIVO
load-data-to-vivo.sh
transform-map-vivo-doc-type.sh
load-data-doc_type-to-vivo.sh ; vivo-recomputeIndex.sh & 

transform-map-vivo-person.sh
load-data-person-to-vivo.sh ; vivo-recomputeIndex.sh &

transform-map-vivo-expertises.sh
load-data-expertises-to-vivo.sh ; vivo-recomputeIndex.sh &

transform-map-expertise-and-item-to-a-person-to-vivo.sh
load-data-person-expertise-to-vivo.sh ; vivo-recomputeIndex.sh

###################################################################
# Done ETL Process
echo "Done!"