Page tree

Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Comment: tools: dspace-xmlui-lang-util

...

Given the English original (messages.xml or Messages.properties) and the corresponding translation file, this script will report both strings missing in the translation file and extra strings that shouldn't be in the translation. Tested on Python 2.7, requires ElementTree (wrapper for libxml2) for checking XML and/or jprops for testing .properties.

Here's also a slightly longer version which preserves the order of strings in the output: dspace-l10n-check.py
Code Block
languagepython
titledspace-l10n-check.py
#!/usr/bin/env python

import sys

class MessagesXmlParser():
        def __init__(self, filename):
                import xml.etree.ElementTree as etree

                self.keys = []

                tree = etree.parse(filename)
                root = tree.getroot()
                for message in root:
                        self.keys.append(message.attrib['key'])

class MessagesPropertiesParser():
        def __init__(self, filename):
                try:
                        import jprops
                except:
                        print('Error: jprops module for parsing .properties files is missing. Download and follow installation instructions from http://mgood.github.com/jprops/')
                        sys.exit(2)

                self.keys = []

                with open(filename) as fp:
                        for key, value in jprops.iter_properties(fp):
                                self.keys.append(key)

if __name__ == "__main__":
        if len(sys.argv) != 3:
                print("Usage:")
                print("       %s messages.xml messages_XX.xml" % (sys.argv[0]))
                print("or")
                print("       %s Messages.properties Messages_XX.properties" % (sys.argv[0]))
                sys.exit(1)

        testfile = open(sys.argv[1], 'rb')
        if testfile.readline().find('<?xml') != -1:
                # xml file detected, assume messages.xml
                messages_tmpl = MessagesXmlParser(sys.argv[1])
                messages_in   = MessagesXmlParser(sys.argv[2])
        else:
                # assume Messages.properties
                messages_tmpl = MessagesPropertiesParser(sys.argv[1])
                messages_in   = MessagesPropertiesParser(sys.argv[2])

        print "Present in %s but missing in %s:" % (sys.argv[1], sys.argv[2])
        for i in set(messages_tmpl.keys) - set(messages_in.keys):
                print i
        print "\nPresent in %s but missing in %s:" % (sys.argv[2], sys.argv[1])
        for i in set(messages_in.keys) - set(messages_tmpl.keys):
                print i
 

 

Newer version of the script which allows comparison and dumping in columnar TSV of several messages files simulatenously: dspace-i10n-check2.py. Usage example:

Code Block
languagebash
titleUsage examples
./dspace-i10n-check2.py --compare file1 file2
./dspace-i10n-check2.py --dump file1
./dspace-i10n-check2.py --dump-tsv-for-translation /dspace/webapps/xmlui/i18n/messages.xml /dspace/webapps/xmlui/i18n/messages_es.xml /dspace/webapps/xmlui/i18n/messages_it.xml

 

Python script to prepare a list of untranslated XMLUI strings when updating and older translation

A python script to generate a messages_xx.xml file by merging existing translations from an older committed translation to the latest messages.xml file. Thus, you get a file with the latest keys, where you only need to look for the untranslated strings.

https://github.com/evelthon/dspace-xmlui-lang-util

PHP Script to check for missing/extra strings

Given the English original (messages.xml and the corresponding translation file, this script will report both strings missing in the translation file and extra strings that shouldn't be in the translation.

Code Block
languagephp
titlecheckkeys.php
<?php
if ($argc != 3) {
    echo "Usage: checkkeys.php <master> <tocheck>\n";
    exit(1);
}

$masterKeys  = getKeys($argv[1]);
$toCheckKeys = getKeys($argv[2]);

print "IN $argv[1] BUT NOT IN $argv[2]:\n\n";
printMissing($masterKeys, $toCheckKeys);

print "\n\n\nIN $argv[2] BUT NOT IN $argv[1]:\n\n";
printMissing($toCheckKeys, $masterKeys);

function printMissing($reference, $test)
{
    foreach ($test as $value) {
        if (! in_array($value, $reference)) {
            echo "$value \n";
        }
    }
}

function readFileMessage($path)
{
    if (! $fileContent = @file($path)) {
        echo "Can't open $path \n";
        exit(1);
    }

    return $fileContent;
}

function getKeys($path)
{
    $fileContent = readFileMessage($path);

    return readKeys($fileContent);
}

function readKeys($file)
{
    $keys = [];

    foreach ($file as $key => $line) {
        if (strpos($line, '<message key="') === false) {
            continue;
        }

        $key = getKey($line);

        $keys[] = $key;
    }

    return $keys;
}

function getKey($line)
{
    $line = trim($line);

    $line = substr($line, 14);

    $charEnd = strpos($line, '">');

    $key = substr($line, 0, $charEnd - strlen($line));

    return $key;
}
Code Block
languagebash
titleUsage examples - checkkeys.php
php ./checkkeys.php file1 file2
php ./checkkeys.php /dspace/webapps/xmlui/i18n/messages.xml /dspace/webapps/xmlui/i18n/messages_es.xml /dspace/webapps/xmlui/i18n/messages_it.xml

 

...