Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add spanEnd and breakEnd to Data.Text #312

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion src/Data/Text.hs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@ module Data.Text
, breakOn
, breakOnEnd
, break
, breakEnd
, span
, spanEnd
, group
, groupBy
, inits
Expand Down Expand Up @@ -221,7 +223,7 @@ import qualified Data.Text.Internal.Fusion as S
import qualified Data.Text.Internal.Fusion.Common as S
import Data.Text.Encoding (decodeUtf8', encodeUtf8)
import Data.Text.Internal.Fusion (stream, reverseStream, unstream)
import Data.Text.Internal.Private (span_)
import Data.Text.Internal.Private (span_, spanEnd_)
import Data.Text.Internal (Text(..), empty, firstf, mul, safe, text)
import Data.Text.Show (singleton, unpack, unpackCString#)
import qualified Prelude as P
Expand Down Expand Up @@ -1333,6 +1335,15 @@ span p t = case span_ p t of
(# hd,tl #) -> (hd,tl)
{-# INLINE span #-}

-- | /O(n)/ Similar to 'span', but searches from the end of the
-- string.
--
-- >>> T.spanEnd (=='0') "AB000"
-- ("AB","000")
spanEnd :: (Char -> Bool) -> Text -> (Text, Text)
spanEnd p t = case spanEnd_ p t of (# hd,tl #) -> (hd,tl)
{-# inline spanEnd #-}

-- | /O(n)/ 'break' is like 'span', but the prefix returned is
-- over elements that fail the predicate @p@.
--
Expand All @@ -1342,6 +1353,15 @@ break :: (Char -> Bool) -> Text -> (Text, Text)
break p = span (not . p)
{-# INLINE break #-}

-- | /O(n)/ Similar to 'break', but searches from the end of the
-- string.
--
-- >>> T.breakEnd (=='0') "180cm"
-- ("180","cm")
breakEnd :: (Char -> Bool) -> Text -> (Text, Text)
breakEnd p = spanEnd (not . p)
{-# inline breakEnd #-}

-- | /O(n)/ Group characters in a string according to a predicate.
groupBy :: (Char -> Char -> Bool) -> Text -> [Text]
groupBy p = loop
Expand Down
13 changes: 12 additions & 1 deletion src/Data/Text/Internal/Private.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ module Data.Text.Internal.Private
(
runText
, span_
, spanEnd_
) where

import Control.Monad.ST (ST, runST)
import Data.Text.Internal (Text(..), text)
import Data.Text.Unsafe (Iter(..), iter)
import Data.Text.Unsafe (Iter(..), iter, reverseIter)
import qualified Data.Text.Array as A

span_ :: (Char -> Bool) -> Text -> (# Text, Text #)
Expand All @@ -30,6 +31,16 @@ span_ p t@(Text arr off len) = (# hd,tl #)
where Iter c d = iter t i
{-# INLINE span_ #-}

spanEnd_ :: (Char -> Bool) -> Text -> (# Text, Text #)
spanEnd_ p t@(Text arr off len) = (# hd,tl #)
where hd = text arr off (k+1)
tl = text arr (off+k+1) (len-(k+1))
!k = loop (len-1)
loop !i | i >= 0 && p c = loop (i+d)
| otherwise = i
where (c,d) = reverseIter t i
{-# INLINE spanEnd_ #-}

runText :: (forall s. (A.MArray s -> Int -> ST s Text) -> ST s Text) -> Text
runText act = runST (act $ \ !marr !len -> do
arr <- A.unsafeFreeze marr
Expand Down
28 changes: 27 additions & 1 deletion src/Data/Text/Lazy.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE BangPatterns, MagicHash, CPP, TypeFamilies #-}
{-# LANGUAGE BangPatterns, MagicHash, CPP, OverloadedStrings, TypeFamilies #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE TemplateHaskellQuotes #-}

Expand Down Expand Up @@ -145,9 +145,11 @@ module Data.Text.Lazy
, stripEnd
, splitAt
, span
, spanEnd
, breakOn
, breakOnEnd
, break
, breakEnd
, group
, groupBy
, inits
Expand Down Expand Up @@ -1365,6 +1367,22 @@ break p t0 = break' t0
| otherwise -> let (a,b) = T.splitAt n t
in (Chunk a Empty, Chunk b ts)

-- | /O(n)/ Similar to 'break', but searches from the end of the string.
--
-- >>> T.breakEnd (=='0') "180cm"
-- ("180","cm")
breakEnd :: (Char -> Bool) -> Text -> (Text, Text)
breakEnd p src = breakEnd' (reverseSpine src) where
reverseSpine = go Empty where
go res Empty = res
go res (Chunk t ts) = go (Chunk t res) ts
breakEnd' = go Empty where
go r Empty = (empty, r)
go r (Chunk t ts) = case T.breakEnd p t of
("", _) -> go (Chunk t r) ts
(l, r') -> (reverseSpine (Chunk l ts), Chunk r' r)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good, but please do not enable OverloadedStrings in this module.

Would it be possible to implement via foldrChunks? Something like

breakEnd :: (Char -> Bool) -> Text -> (Text, Text)
breakEnd p = foldrChunks go (empty, empty) 
    where 
        go x (ys, zs) 
            | null ys   = let (y, z) = T.breakEnd p x in (chunk y empty, chunk z zs)
            | otherwise = (chunk x ys, zs)

{-# INLINE breakEnd #-}

-- | /O(n)/ 'span', applied to a predicate @p@ and text @t@, returns
-- a pair whose first element is the longest prefix (possibly empty)
-- of @t@ of elements that satisfy @p@, and whose second is the
Expand All @@ -1376,6 +1394,14 @@ span :: (Char -> Bool) -> Text -> (Text, Text)
span p = break (not . p)
{-# INLINE span #-}

-- | /O(n)/ Similar to 'span', but searches from the end of the string.
--
-- >>> T.spanEnd Data.Char.isAlpha "000AB"
-- ("000","AB")
spanEnd :: (Char -> Bool) -> Text -> (Text, Text)
spanEnd p = breakEnd (not . p)
{-# INLINE spanEnd #-}

-- | The 'group' function takes a 'Text' and returns a list of 'Text's
-- such that the concatenation of the result is equal to the argument.
-- Moreover, each sublist in the result contains only equal elements.
Expand Down
21 changes: 19 additions & 2 deletions tests/Tests/Properties/Substrings.hs
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,14 @@ t_strip = T.dropAround isSpace `eq` T.strip
tl_strip = TL.dropAround isSpace `eq` TL.strip
t_splitAt n = L.splitAt n `eqP` (unpack2 . T.splitAt n)
tl_splitAt n = L.splitAt n `eqP` (unpack2 . TL.splitAt (fromIntegral n))
t_span p = L.span p `eqP` (unpack2 . T.span p)
tl_span p = L.span p `eqP` (unpack2 . TL.span p)
t_span p = L.span p `eqP` (unpack2 . T.span p)
tl_span p = L.span p `eqP` (unpack2 . TL.span p)
t_spanEnd p = spanEnd p `eqP` (unpack2 . T.spanEnd p)
tl_spanEnd p = spanEnd p `eqP` (unpack2 . TL.spanEnd p)

spanEnd :: (a -> Bool) -> [a] -> ([a], [a])
spanEnd p l = case span p $ reverse l of
(s, e) -> (reverse e, reverse s)

t_breakOn_id s = squid `eq` (uncurry T.append . T.breakOn s)
where squid t | T.null s = error "empty"
Expand All @@ -110,6 +116,13 @@ tl_breakOnEnd_end (NotEmpty s) t =
in k `TL.isSuffixOf` t && (TL.null m || s `TL.isSuffixOf` m)
t_break p = L.break p `eqP` (unpack2 . T.break p)
tl_break p = L.break p `eqP` (unpack2 . TL.break p)
t_breakEnd p = breakEnd p `eqP` (unpack2 . T.breakEnd p)
tl_breakEnd p = breakEnd p `eqP` (unpack2 . TL.breakEnd p)

breakEnd :: (a -> Bool) -> [a] -> ([a], [a])
breakEnd p l = case break p $ reverse l of
(s, e) -> (reverse e, reverse s)

t_group = L.group `eqP` (map unpackS . T.group)
tl_group = L.group `eqP` (map unpackS . TL.group)
t_groupBy p = L.groupBy p `eqP` (map unpackS . T.groupBy p)
Expand Down Expand Up @@ -275,6 +288,8 @@ testSubstrings =
testProperty "tl_splitAt" tl_splitAt,
testProperty "t_span" t_span,
testProperty "tl_span" tl_span,
testProperty "t_spanEnd" t_spanEnd,
testProperty "tl_spanEnd" tl_spanEnd,
testProperty "t_breakOn_id" t_breakOn_id,
testProperty "tl_breakOn_id" tl_breakOn_id,
testProperty "t_breakOn_start" t_breakOn_start,
Expand All @@ -283,6 +298,8 @@ testSubstrings =
testProperty "tl_breakOnEnd_end" tl_breakOnEnd_end,
testProperty "t_break" t_break,
testProperty "tl_break" tl_break,
testProperty "t_breakEnd" t_breakEnd,
testProperty "tl_breakEnd" tl_breakEnd,
testProperty "t_group" t_group,
testProperty "tl_group" tl_group,
testProperty "t_groupBy" t_groupBy,
Expand Down