diff --git a/tests/testing_html/sample.html b/tests/testing_html/sample.html index a853422..9fadd13 100644 --- a/tests/testing_html/sample.html +++ b/tests/testing_html/sample.html @@ -1,5 +1,6 @@

one paragraph with special characters: äöüľščťžýáíéäňú©«®™»

one paragraph with a
break line

+

one paragraph with html entities: &<>

and here we have some link break line

one paragraph with an not existing imageimage

hello world this is simple html

nested 1
nested 2
nested 3
diff --git a/translators/file/tmgmt_file.test b/translators/file/tmgmt_file.test index 82d9a60..e4a162e 100644 --- a/translators/file/tmgmt_file.test +++ b/translators/file/tmgmt_file.test @@ -111,17 +111,15 @@ class TMGMTFileTestCase extends TMGMTBaseTestCase { $reader->XML('' . $text . ''); while ($reader->read()) { // There should not be any other elements then those listed. - if (!in_array($reader->name, array('bpt', 'ept', 'ph', 'x', '#text', 'content'))) { - debug($reader->name); - } - $this->assertTrue(in_array($reader->name, array('bpt', 'ept', 'ph', 'x', '#text', 'content'))); + $this->assertTrue(in_array($reader->name, array('bpt', 'ept', 'ph', 'x', '#text', '#cdata-section', 'content'))); } $job = $this->importTranslatedFile($job); $text = $this->findTranslationText($job->getData()); - // Make sure we have non malformed special characters present in the - // translation text. + // Make sure we have non malformed special characters and html entities + // present in the translation text. $this->assertTrue(strpos($text, 'äöüľščťžýáíéäňú©«®™»') !== FALSE); + $this->assertTrue(strpos($text, '&<>') !== FALSE); $reader = new XMLReader(); $reader->XML('' . $text . ''); while ($reader->read()) { diff --git a/translators/file/tmgmt_file.text_processor.xliff_mask_html.inc b/translators/file/tmgmt_file.text_processor.xliff_mask_html.inc index b429202..c33d523 100644 --- a/translators/file/tmgmt_file.text_processor.xliff_mask_html.inc +++ b/translators/file/tmgmt_file.text_processor.xliff_mask_html.inc @@ -126,7 +126,6 @@ class TMGMTFileXLIFFMaskHTMLProcessor implements TMGMTFileTextProcessorInterface */ protected function processTranslation(&$data) { if (isset($data['#translation'])) { - $reader = new XMLReader(); $reader->XML('' . $data['#translation']['#text'] . ''); $text = ''; @@ -139,7 +138,7 @@ class TMGMTFileXLIFFMaskHTMLProcessor implements TMGMTFileTextProcessorInterface } // If the current element is text append it to the result text. - if ($reader->name == '#text') { + if ($reader->name == '#text' || $reader->name == '#cdata-section') { $text .= $reader->value; } elseif ($reader->name == 'x') { @@ -160,7 +159,6 @@ class TMGMTFileXLIFFMaskHTMLProcessor implements TMGMTFileTextProcessorInterface } } } - $data['#translation']['#text'] = $text; } else { @@ -238,7 +236,7 @@ class TMGMTFileXLIFFMaskHTMLProcessor implements TMGMTFileTextProcessorInterface // The current node is a text. elseif ($node->nodeName == '#text') { // Add the node value to the text output. - $writer->text($node->nodeValue); + $writer->writeCdata($this->toEntities($node->nodeValue)); foreach ($tray as &$info) { $info['built_text'] .= $node->nodeValue; } @@ -378,4 +376,20 @@ class TMGMTFileXLIFFMaskHTMLProcessor implements TMGMTFileTextProcessorInterface // } $writer->endElement(); } + + /** + * Convert critical characters to html entities. + * + * DOMDocument will convert html entities to its actual characters. This can + * lead into situation when not allowed characters will appear in the content. + * + * @param string $string + * String to escape. + * + * @return string + * Escaped string. + */ + protected function toEntities($string) { + return str_replace(array('&', '>', '<'), array('&', '>', '<'), $string); + } }