diff --git a/src/Data/Text.hs b/src/Data/Text.hs index 86cfd84f..480a2cb0 100644 --- a/src/Data/Text.hs +++ b/src/Data/Text.hs @@ -141,7 +141,9 @@ module Data.Text , breakOn , breakOnEnd , break + , breakEnd , span + , spanEnd , group , groupBy , inits @@ -221,7 +223,7 @@ import qualified Data.Text.Internal.Fusion as S import qualified Data.Text.Internal.Fusion.Common as S import Data.Text.Encoding (decodeUtf8', encodeUtf8) import Data.Text.Internal.Fusion (stream, reverseStream, unstream) -import Data.Text.Internal.Private (span_) +import Data.Text.Internal.Private (span_, spanEnd_) import Data.Text.Internal (Text(..), empty, firstf, mul, safe, text) import Data.Text.Show (singleton, unpack, unpackCString#) import qualified Prelude as P @@ -1333,6 +1335,15 @@ span p t = case span_ p t of (# hd,tl #) -> (hd,tl) {-# INLINE span #-} +-- | /O(n)/ Similar to 'span', but searches from the end of the +-- string. +-- +-- >>> T.spanEnd (=='0') "AB000" +-- ("AB","000") +spanEnd :: (Char -> Bool) -> Text -> (Text, Text) +spanEnd p t = case spanEnd_ p t of (# hd,tl #) -> (hd,tl) +{-# inline spanEnd #-} + -- | /O(n)/ 'break' is like 'span', but the prefix returned is -- over elements that fail the predicate @p@. -- @@ -1342,6 +1353,15 @@ break :: (Char -> Bool) -> Text -> (Text, Text) break p = span (not . p) {-# INLINE break #-} +-- | /O(n)/ Similar to 'break', but searches from the end of the +-- string. +-- +-- >>> T.breakEnd (=='0') "180cm" +-- ("180","cm") +breakEnd :: (Char -> Bool) -> Text -> (Text, Text) +breakEnd p = spanEnd (not . p) +{-# inline breakEnd #-} + -- | /O(n)/ Group characters in a string according to a predicate. groupBy :: (Char -> Char -> Bool) -> Text -> [Text] groupBy p = loop diff --git a/src/Data/Text/Internal/Private.hs b/src/Data/Text/Internal/Private.hs index 7e6fe230..e7df6363 100644 --- a/src/Data/Text/Internal/Private.hs +++ b/src/Data/Text/Internal/Private.hs @@ -13,11 +13,12 @@ module Data.Text.Internal.Private ( runText , span_ + , spanEnd_ ) where import Control.Monad.ST (ST, runST) import Data.Text.Internal (Text(..), text) -import Data.Text.Unsafe (Iter(..), iter) +import Data.Text.Unsafe (Iter(..), iter, reverseIter) import qualified Data.Text.Array as A span_ :: (Char -> Bool) -> Text -> (# Text, Text #) @@ -30,6 +31,16 @@ span_ p t@(Text arr off len) = (# hd,tl #) where Iter c d = iter t i {-# INLINE span_ #-} +spanEnd_ :: (Char -> Bool) -> Text -> (# Text, Text #) +spanEnd_ p t@(Text arr off len) = (# hd,tl #) + where hd = text arr off (k+1) + tl = text arr (off+k+1) (len-(k+1)) + !k = loop (len-1) + loop !i | i >= 0 && p c = loop (i+d) + | otherwise = i + where (c,d) = reverseIter t i +{-# INLINE spanEnd_ #-} + runText :: (forall s. (A.MArray s -> Int -> ST s Text) -> ST s Text) -> Text runText act = runST (act $ \ !marr !len -> do arr <- A.unsafeFreeze marr diff --git a/src/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs index 606c538b..2dd360d9 100644 --- a/src/Data/Text/Lazy.hs +++ b/src/Data/Text/Lazy.hs @@ -1,5 +1,5 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} -{-# LANGUAGE BangPatterns, MagicHash, CPP, TypeFamilies #-} +{-# LANGUAGE BangPatterns, MagicHash, CPP, OverloadedStrings, TypeFamilies #-} {-# LANGUAGE Trustworthy #-} {-# LANGUAGE TemplateHaskellQuotes #-} @@ -145,9 +145,11 @@ module Data.Text.Lazy , stripEnd , splitAt , span + , spanEnd , breakOn , breakOnEnd , break + , breakEnd , group , groupBy , inits @@ -1365,6 +1367,22 @@ break p t0 = break' t0 | otherwise -> let (a,b) = T.splitAt n t in (Chunk a Empty, Chunk b ts) +-- | /O(n)/ Similar to 'break', but searches from the end of the string. +-- +-- >>> T.breakEnd (=='0') "180cm" +-- ("180","cm") +breakEnd :: (Char -> Bool) -> Text -> (Text, Text) +breakEnd p src = breakEnd' (reverseSpine src) where + reverseSpine = go Empty where + go res Empty = res + go res (Chunk t ts) = go (Chunk t res) ts + breakEnd' = go Empty where + go r Empty = (empty, r) + go r (Chunk t ts) = case T.breakEnd p t of + ("", _) -> go (Chunk t r) ts + (l, r') -> (reverseSpine (Chunk l ts), Chunk r' r) +{-# INLINE breakEnd #-} + -- | /O(n)/ 'span', applied to a predicate @p@ and text @t@, returns -- a pair whose first element is the longest prefix (possibly empty) -- of @t@ of elements that satisfy @p@, and whose second is the @@ -1376,6 +1394,14 @@ span :: (Char -> Bool) -> Text -> (Text, Text) span p = break (not . p) {-# INLINE span #-} +-- | /O(n)/ Similar to 'span', but searches from the end of the string. +-- +-- >>> T.spanEnd Data.Char.isAlpha "000AB" +-- ("000","AB") +spanEnd :: (Char -> Bool) -> Text -> (Text, Text) +spanEnd p = breakEnd (not . p) +{-# INLINE spanEnd #-} + -- | The 'group' function takes a 'Text' and returns a list of 'Text's -- such that the concatenation of the result is equal to the argument. -- Moreover, each sublist in the result contains only equal elements. diff --git a/tests/Tests/Properties/Substrings.hs b/tests/Tests/Properties/Substrings.hs index 279ad328..392ad88f 100644 --- a/tests/Tests/Properties/Substrings.hs +++ b/tests/Tests/Properties/Substrings.hs @@ -87,8 +87,14 @@ t_strip = T.dropAround isSpace `eq` T.strip tl_strip = TL.dropAround isSpace `eq` TL.strip t_splitAt n = L.splitAt n `eqP` (unpack2 . T.splitAt n) tl_splitAt n = L.splitAt n `eqP` (unpack2 . TL.splitAt (fromIntegral n)) -t_span p = L.span p `eqP` (unpack2 . T.span p) -tl_span p = L.span p `eqP` (unpack2 . TL.span p) +t_span p = L.span p `eqP` (unpack2 . T.span p) +tl_span p = L.span p `eqP` (unpack2 . TL.span p) +t_spanEnd p = spanEnd p `eqP` (unpack2 . T.spanEnd p) +tl_spanEnd p = spanEnd p `eqP` (unpack2 . TL.spanEnd p) + +spanEnd :: (a -> Bool) -> [a] -> ([a], [a]) +spanEnd p l = case span p $ reverse l of + (s, e) -> (reverse e, reverse s) t_breakOn_id s = squid `eq` (uncurry T.append . T.breakOn s) where squid t | T.null s = error "empty" @@ -110,6 +116,13 @@ tl_breakOnEnd_end (NotEmpty s) t = in k `TL.isSuffixOf` t && (TL.null m || s `TL.isSuffixOf` m) t_break p = L.break p `eqP` (unpack2 . T.break p) tl_break p = L.break p `eqP` (unpack2 . TL.break p) +t_breakEnd p = breakEnd p `eqP` (unpack2 . T.breakEnd p) +tl_breakEnd p = breakEnd p `eqP` (unpack2 . TL.breakEnd p) + +breakEnd :: (a -> Bool) -> [a] -> ([a], [a]) +breakEnd p l = case break p $ reverse l of + (s, e) -> (reverse e, reverse s) + t_group = L.group `eqP` (map unpackS . T.group) tl_group = L.group `eqP` (map unpackS . TL.group) t_groupBy p = L.groupBy p `eqP` (map unpackS . T.groupBy p) @@ -275,6 +288,8 @@ testSubstrings = testProperty "tl_splitAt" tl_splitAt, testProperty "t_span" t_span, testProperty "tl_span" tl_span, + testProperty "t_spanEnd" t_spanEnd, + testProperty "tl_spanEnd" tl_spanEnd, testProperty "t_breakOn_id" t_breakOn_id, testProperty "tl_breakOn_id" tl_breakOn_id, testProperty "t_breakOn_start" t_breakOn_start, @@ -283,6 +298,8 @@ testSubstrings = testProperty "tl_breakOnEnd_end" tl_breakOnEnd_end, testProperty "t_break" t_break, testProperty "tl_break" tl_break, + testProperty "t_breakEnd" t_breakEnd, + testProperty "tl_breakEnd" tl_breakEnd, testProperty "t_group" t_group, testProperty "tl_group" tl_group, testProperty "t_groupBy" t_groupBy,