From 1ce0dabba83ef463684366fb36f0117f3a5a2ab9 Mon Sep 17 00:00:00 2001 From: David Grudl Date: Sat, 25 May 2024 13:36:57 +0200 Subject: [PATCH] Strings::matchAll(): added option 'lazy' --- src/Utils/Strings.php | 32 +++++++++--- tests/Utils/Strings.matchAll.lazy().phpt | 63 ++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 6 deletions(-) create mode 100644 tests/Utils/Strings.matchAll.lazy().phpt diff --git a/src/Utils/Strings.php b/src/Utils/Strings.php index 1615b3e86..4c21578a8 100644 --- a/src/Utils/Strings.php +++ b/src/Utils/Strings.php @@ -589,6 +589,7 @@ public static function match( /** * Searches the string for all occurrences matching the regular expression and * returns an array of arrays containing the found expression and each subexpression. + * @return ($lazy is true ? \Generator : array[]) */ public static function matchAll( string $subject, @@ -599,21 +600,41 @@ public static function matchAll( bool $unmatchedAsNull = false, bool $patternOrder = false, bool $utf8 = false, - ): array + bool $lazy = false, + ): array|\Generator { - $flags = is_int($captureOffset) // back compatibility - ? $captureOffset - : ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0) | ($patternOrder ? PREG_PATTERN_ORDER : 0); - if ($utf8) { $offset = strlen(self::substring($subject, 0, $offset)); $pattern .= 'u'; } + if ($lazy) { + $flags = PREG_OFFSET_CAPTURE | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0); + return (function () use ($utf8, $captureOffset, $flags, $subject, $pattern, $offset) { + $counter = 0; + while ( + $offset <= strlen($subject) + && self::pcre('preg_match', [$pattern, $subject, &$m, $flags, $offset]) + ) { + $offset = $m[0][1] + strlen($m[0][0]); + if (!$captureOffset) { + $m = array_map(fn($item) => $item[0], $m); + } elseif ($utf8) { + $m = self::bytesToChars($subject, [$m])[0]; + } + yield $counter++ => $m; + } + })(); + } + if ($offset > strlen($subject)) { return []; } + $flags = is_int($captureOffset) // back compatibility + ? $captureOffset + : ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0) | ($patternOrder && !$lazy ? PREG_PATTERN_ORDER : 0); + self::pcre('preg_match_all', [ $pattern, $subject, &$m, ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER), @@ -622,7 +643,6 @@ public static function matchAll( return $utf8 && $captureOffset ? self::bytesToChars($subject, $m) : $m; - } diff --git a/tests/Utils/Strings.matchAll.lazy().phpt b/tests/Utils/Strings.matchAll.lazy().phpt new file mode 100644 index 000000000..5a68aab20 --- /dev/null +++ b/tests/Utils/Strings.matchAll.lazy().phpt @@ -0,0 +1,63 @@ +