From 1cf86c3050d14e6584ca669bb2f603a529a55889 Mon Sep 17 00:00:00 2001 From: Joao Inacio Date: Tue, 26 Jul 2016 15:52:11 +0100 Subject: [PATCH 1/3] EZP-26058: update script for non-breaking spaces in ezxmltext --- update/common/scripts/updatenbxmlcontents.php | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 update/common/scripts/updatenbxmlcontents.php diff --git a/update/common/scripts/updatenbxmlcontents.php b/update/common/scripts/updatenbxmlcontents.php new file mode 100644 index 00000000000..e7b8db92ffc --- /dev/null +++ b/update/common/scripts/updatenbxmlcontents.php @@ -0,0 +1,117 @@ + "Updates non-break space encoding in ezxml contents. See issue EZP-18220\n", + 'use-session' => true, + 'use-modules' => false, + 'use-extensions' => true + ) +); +$script->startup(); + +$options = $script->getOptions( + "[dry-run][n][v][iteration-sleep:][iteration-limit:]", + "", + array( + 'dry-run' => 'Dry run', + 'iteration-sleep' => 'Sleep duration between batches, in seconds (default: 1)', + 'iteration-limit' => 'Batch size (default: 100)', + 'n' => 'Do not wait 30 seconds before starting', + ) +); +$optDryRun = (bool)$options['dry-run']; +$optIterationSleep = (int)$options['iteration-sleep'] ?: 1; +$optIterationLimit = (int)$options['iteration-limit'] ?: 100; +$verboseLevel = $script->verboseOutputLevel(); + +$limit = array( + "offset" => 0, + "limit" => $optIterationLimit, +); + +$script->initialize(); +$db = eZDB::instance(); + +if ( $optDryRun ) { + $cli->warning( "dry-run mode" ); +} + +/** + * Updates non-breaking spaces from existing "&nbsp;" to proper "\xC2\xA0" + * + * @param array $attribute + */ +function updateEzxmlNonbreakSpaces( $attribute, $optDryRun, $verbose ) +{ + $id = $attribute['id']; + $contentId = $attribute['contentobject_id']; + $version = $attribute['version']; + $xmlData = $attribute['data_text']; + + $pattern = '/(]*\>)(.*)&nbsp;(.*)(<\/paragraph>)/'; + $replace = "\\1\\2\xC2\xA0\\3\\4"; + do { + $xmlData = preg_replace( $pattern, $replace, $xmlData, -1, $countReplaced ); + } while ($countReplaced > 0); + + if ( $verbose ) { + eZCLI::instance()->output( "Updating data for content #$contentId (ver. $version) ..." ); + } + if ( !$optDryRun ) { + eZDB::instance()->query( "UPDATE ezcontentobject_attribute SET data_text='$xmlData' WHERE id='$id'" ); + } +} + +if ( !$options['n'] ) +{ + $cli->output(); + $cli->warning( "You have 30 seconds to break the script before actual processing starts (press Ctrl-C)." ); + $cli->warning( "Execute the script with '-n' switch to skip this delay." ); + sleep( 30 ); +} + +$attributeCount = $db->arrayQuery( + "SELECT count(id) as count " . + "FROM ezcontentobject_attribute attr " . + "WHERE data_type_string='ezxmltext' AND data_text LIKE '%&nbsp;%' " +); +$attributeCount = $attributeCount[0]['count']; + +$cli->output( "Number of xml attributes to update: " . $attributeCount ); + +// main loop +do { + $rows = $db->arrayQuery( + "SELECT id, contentobject_id, version, data_text " . + "FROM ezcontentobject_attribute attr " . + "WHERE data_type_string='ezxmltext' AND data_text LIKE '%&nbsp;%' ", + $limit + ); + + $db->begin(); + foreach ( $rows as $attribute ) + { + updateEzxmlNonbreakSpaces( $attribute, $optDryRun, $verboseLevel ); + } + $db->commit(); + + $cli->output("."); + + $limit["offset"] += $optIterationLimit; + sleep( $optIterationSleep ); +} while ( count($rows) == $optIterationLimit ); + +$cli->output( "Update has been completed." ); + +$script->shutdown(); From d41a123176b6ffefd2bdfa632bed3dc4eeb7049e Mon Sep 17 00:00:00 2001 From: Joao Inacio Date: Thu, 28 Jul 2016 15:15:45 +0100 Subject: [PATCH 2/3] fixup! EZP-26058: update script for non-breaking spaces in ezxmltext --- update/common/scripts/updatenbxmlcontents.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/update/common/scripts/updatenbxmlcontents.php b/update/common/scripts/updatenbxmlcontents.php index e7b8db92ffc..3bb76b380d8 100644 --- a/update/common/scripts/updatenbxmlcontents.php +++ b/update/common/scripts/updatenbxmlcontents.php @@ -31,8 +31,8 @@ ) ); $optDryRun = (bool)$options['dry-run']; -$optIterationSleep = (int)$options['iteration-sleep'] ?: 1; -$optIterationLimit = (int)$options['iteration-limit'] ?: 100; +$optIterationSleep = $options['iteration-sleep'] ? (int)$options['iteration-sleep'] : 1; +$optIterationLimit = $options['iteration-limit'] ? (int)$options['iteration-limit'] : 100; $verboseLevel = $script->verboseOutputLevel(); $limit = array( @@ -43,7 +43,8 @@ $script->initialize(); $db = eZDB::instance(); -if ( $optDryRun ) { +if ( $optDryRun ) + $cli->warning( "dry-run mode" ); } From 25bac64394e96f22def07fd6d634fb922e6edc3a Mon Sep 17 00:00:00 2001 From: Joao Inacio Date: Thu, 28 Jul 2016 15:33:36 +0100 Subject: [PATCH 3/3] fixup! EZP-26058: update script for non-breaking spaces in ezxmltext --- update/common/scripts/updatenbxmlcontents.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/update/common/scripts/updatenbxmlcontents.php b/update/common/scripts/updatenbxmlcontents.php index 3bb76b380d8..603a3eefee1 100644 --- a/update/common/scripts/updatenbxmlcontents.php +++ b/update/common/scripts/updatenbxmlcontents.php @@ -44,7 +44,7 @@ $db = eZDB::instance(); if ( $optDryRun ) - +{ $cli->warning( "dry-run mode" ); } @@ -60,8 +60,10 @@ function updateEzxmlNonbreakSpaces( $attribute, $optDryRun, $verbose ) $version = $attribute['version']; $xmlData = $attribute['data_text']; - $pattern = '/(]*\>)(.*)&nbsp;(.*)(<\/paragraph>)/'; - $replace = "\\1\\2\xC2\xA0\\3\\4"; + $matchTags = implode('|', array( 'paragraph', 'header') ); + $pattern = '/(<(?' . $matchTags . ')[^>]*\>)(.*)&nbsp;(.*)(<\/(?P=tag)>)/'; + $replace = "\\1\\3\xC2\xA0\\4\\5"; + do { $xmlData = preg_replace( $pattern, $replace, $xmlData, -1, $countReplaced ); } while ($countReplaced > 0);