diff --git a/ChangeLog.md b/ChangeLog.md index 4a61680..5576c72 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,3 +1,6 @@ +## 0.3.0.2 +* Fix handling of concatenated bzip2 files + ## 0.3.0.1 * Reduce dep to `data-default-class` [#6](https://github.com/snoyberg/bzlib-conduit/pull/6) diff --git a/src/Data/Conduit/BZlib.hs b/src/Data/Conduit/BZlib.hs index 755a025..36efcff 100644 --- a/src/Data/Conduit/BZlib.hs +++ b/src/Data/Conduit/BZlib.hs @@ -1,6 +1,7 @@ {-# LANGUAGE RecordWildCards #-} module Data.Conduit.BZlib ( compress, + decompress1, decompress, bzip2, @@ -131,6 +132,7 @@ compress CompressParams {..} = do yields ptr c'BZ_FINISH loop where + yields :: MonadIO m => Ptr C'bz_stream -> CInt -> ConduitT S.ByteString S.ByteString m () yields ptr action = do cont <- liftIO $ throwIfMinus "bzCompress" $ c'BZ2_bzCompress ptr action mbout <- liftIO $ getAvailOut ptr @@ -140,12 +142,14 @@ compress CompressParams {..} = do when (availIn > 0 || action == c'BZ_FINISH && cont /= c'BZ_STREAM_END) $ yields ptr action --- | Decompress a stream of ByteStrings. -decompress +-- | Decompress a stream of ByteStrings. Note that this will only decompress +-- the first compressed stream in the input and leave the rest for further +-- processing. See 'decompress'. +decompress1 :: MonadResource m => DecompressParams -- ^ Decompress parameter -> ConduitT S.ByteString S.ByteString m () -decompress DecompressParams {..} = do +decompress1 DecompressParams {..} = do (ptr, inbuf) <- lift $ allocateStream _ <- lift $ allocate (throwIfMinus_ "bzDecompressInit" $ @@ -172,13 +176,41 @@ decompress DecompressParams {..} = do yield $ fromJust mbout availIn <- liftIO $ peek $ p'bz_stream'avail_in ptr if availIn > 0 - then yields ptr + then + -- bzip2 files can contain multiple concatenated streams, but the + -- API requires that we close the stream and start a new + -- decompression session. + if ret == c'BZ_STREAM_END + then do + dataIn <- liftIO $ peek $ p'bz_stream'next_in ptr + unread <- liftIO $ S.packCStringLen (dataIn, fromIntegral availIn) + leftover unread + return False + else yields ptr else return $ ret == c'BZ_OK +-- Decompress all the compressed bzip2 streams in the input, as the bzip2 +-- command line tool. +decompress + :: MonadResource m + => DecompressParams -- ^ Decompress parameter + -> ConduitT S.ByteString S.ByteString m () +decompress params = do + next <- await + case next of + Nothing -> return () + Just bs + | S.null bs -> decompress params + | otherwise -> do + leftover bs + decompress1 params + decompress params -- | bzip2 compression with default parameters. bzip2 :: MonadResource m => ConduitT S.ByteString S.ByteString m () bzip2 = compress def --- | bzip2 decompression with default parameters. +-- | bzip2 decompression with default parameters. This will decompress all the +-- streams in the input bunzip2 :: MonadResource m => ConduitT S.ByteString S.ByteString m () bunzip2 = decompress def +