diff --git a/tests/tmgmt_test.info b/tests/tmgmt_test.info index 7be778b..471a8ec 100644 --- a/tests/tmgmt_test.info +++ b/tests/tmgmt_test.info @@ -7,4 +7,3 @@ files[] = tmgmt_test.plugin.source.inc files[] = tmgmt_test.plugin.html_source.inc files[] = tmgmt_test.plugin.translator.inc files[] = tmgmt_test.ui.translator.inc -files[] = tmgmt_test.text_processor.inc diff --git a/translators/file/tmgmt_file.format.xliff.inc b/translators/file/tmgmt_file.format.xliff.inc index d94a765..7107ec7 100644 --- a/translators/file/tmgmt_file.format.xliff.inc +++ b/translators/file/tmgmt_file.format.xliff.inc @@ -2,6 +2,46 @@ /** * Export to XLIFF format. + * + * The XLIFF processor follows this specification: + * @link http://docs.oasis-open.org/xliff/v1.2/xliff-profile-html/xliff-profile-html-1.2-cd02.html + * + * The purpose of this class is to mask or process HTML elements in the source + * and target elements so that translation tools are able to understand which + * content needs to be translated and ignored. + * + * On the other hand we need to properly unmask the XLIFF markup back to HTML on + * the translation import. So the process is bidirectional and prior to running + * the unmasking process we try to validate the integrity in the + * validateJobTranslationUponImport() method. Currently the integrity check + * involves only a counter of XLIFF elements that have been created during + * source processing and has to mach number of XLIFF elements being imported + * with the translation. + * + * To process the content DOMDocument object is used due to its ability to + * read broken HTML. This also implies that if broken HTML is in the source + * content the translation content will be fixed into the extend of DOMDocument + * abilities. + * + * Following is implemented: + * - All pair tags get escaped using markup. + * -
tags are marked with . + * - tags are marked with tags. The title and alt + * attributes should have been extracted into elements, however are not + * as Trados studio triggers a fatal error in case there are two + * elements at the same level. + * + * Not implemented: + * - Attributes of element are written only as attributes of element + * instead of using x-html: prefix. This results in conflict with own + * element's attributes such as "id". The reason why x-html prefix has not + * been used is that Trados studio triggered fatal error on xml validation. + * - Translatable attributes like title and alt. + * @link http://docs.oasis-open.org/xliff/v1.2/xliff-profile-html/xliff-profile-html-1.2-cd02.html#elem_img + * - Forms - this is big part + * @link http://docs.oasis-open.org/xliff/v1.2/xliff-profile-html/xliff-profile-html-1.2-cd02.html#HTMLForms + * -
 elements
+ *   @link http://docs.oasis-open.org/xliff/v1.2/xliff-profile-html/xliff-profile-html-1.2-cd02.html#Elem_preformatted
  */
 class TMGMTFileformatXLIFF extends XMLWriter implements TMGMTFileFormatInterface {
 
@@ -136,7 +176,7 @@ class TMGMTFileformatXLIFF extends XMLWriter implements TMGMTFileFormatInterface
   }
 
   /**
-   * Validates imported xliff file.
+   * Validates imported XLIFF file.
    *
    * Checks:
    * - Job ID.
@@ -206,7 +246,7 @@ class TMGMTFileformatXLIFF extends XMLWriter implements TMGMTFileFormatInterface
       // https://bugs.php.net/bug.php?id=61469
       $xml_string = file_get_contents($imported_file);
       $this->importedXML = simplexml_load_string($xml_string);
-      // Register the xliff namespace, required for xpath.
+      // Register the XLIFF namespace, required for xpath.
       $this->importedXML->registerXPathNamespace('xliff', 'urn:oasis:names:tc:xliff:document:1.2');
     }
 
@@ -223,7 +263,7 @@ class TMGMTFileformatXLIFF extends XMLWriter implements TMGMTFileFormatInterface
       foreach ($this->importedXML->xpath('//xliff:trans-unit') as $unit) {
         $reader->XML($unit->target->asXML());
         $reader->read();
-        $this->importedTransUnits[(string) $unit['id']]['#text'] = $this->processForImport($reader->readInnerXml());
+        $this->importedTransUnits[(string) $unit['id']]['#text'] = $this->processForImport($reader->readInnerXML());
       }
     }
 
@@ -297,7 +337,6 @@ class TMGMTFileformatXLIFF extends XMLWriter implements TMGMTFileFormatInterface
     $writer->startElement('wrapper');
 
     $tray = array();
-    $id = 0;
     $non_pair_tags = array('br', 'img');
     $elements_count = 0;
 
@@ -309,9 +348,9 @@ class TMGMTFileformatXLIFF extends XMLWriter implements TMGMTFileFormatInterface
       }
 
       if ($node->nodeType === XML_ELEMENT_NODE) {
-        // We have a new tag, increment the ID and the elements count.
-        $id++;
+        // Increment the elements count and compose element id.
         $elements_count++;
+        $id = $tjiid . '_' . $elements_count;
 
         $is_pair_tag = !in_array($node->nodeName, $non_pair_tags);
 
@@ -369,7 +408,7 @@ class TMGMTFileformatXLIFF extends XMLWriter implements TMGMTFileFormatInterface
     $reader = new XMLReader();
     $reader->XML($writer->outputMemory());
     $reader->read();
-    return $reader->readInnerXml();
+    return $reader->readInnerXML();
   }
 
   /**
@@ -482,9 +521,9 @@ class TMGMTFileformatXLIFF extends XMLWriter implements TMGMTFileFormatInterface
   }
 
   /**
-   * Convert critical characters to html entities.
+   * Convert critical characters to HTML entities.
    *
-   * DOMDocument will convert html entities to its actual characters. This can
+   * DOMDocument will convert HTML entities to its actual characters. This can
    * lead into situation when not allowed characters will appear in the content.
    *
    * @param string $string
diff --git a/translators/file/tmgmt_file.test b/translators/file/tmgmt_file.test
index 9de4ab8..c457801 100644
--- a/translators/file/tmgmt_file.test
+++ b/translators/file/tmgmt_file.test
@@ -25,7 +25,7 @@ class TMGMTFileTestCase extends TMGMTBaseTestCase {
   }
 
   /**
-   * Test the content processing for xliff export and import.
+   * Test the content processing for XLIFF export and import.
    */
   function testXLIFFTextProcessing() {
     $translator = $this->createTranslator();
@@ -101,7 +101,7 @@ class TMGMTFileTestCase extends TMGMTBaseTestCase {
     // Get the xml content and remove the element representing 
. This will // result in different element counts in the source and target and should // trigger an error and not import the translation. - $content = str_replace('', '', file_get_contents($message->variables['!link'])); + $content = str_replace('', '', file_get_contents($message->variables['!link'])); $xml = simplexml_load_string($content); $translated_file = 'public://tmgmt_file/translated.xlf'; $xml->asXML($translated_file); @@ -132,7 +132,7 @@ class TMGMTFileTestCase extends TMGMTBaseTestCase { } /** - * Gets trans-unit content from the xliff file that has been exported for the + * Gets trans-unit content from the XLIFF file that has been exported for the * given job as last. */ protected function getTransUnitsContent(TMGMTJob $job) { @@ -151,10 +151,10 @@ class TMGMTFileTestCase extends TMGMTBaseTestCase { foreach ($xml->xpath('//xliff:trans-unit') as $unit) { $reader->XML($unit->source->asXML()); $reader->read(); - $data[$i]['source'] = $reader->readInnerXml(); + $data[$i]['source'] = $reader->readInnerXML(); $reader->XML($unit->target->asXML()); $reader->read(); - $data[$i]['target'] = $reader->readInnerXml(); + $data[$i]['target'] = $reader->readInnerXML(); $i++; } @@ -201,7 +201,7 @@ class TMGMTFileTestCase extends TMGMTBaseTestCase { } /** - * Tests import and export for the xliff format. + * Tests import and export for the XLIFF format. */ function testXLIFF() { $translator = $this->createTranslator();