X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FParser%2FHttp.hs;h=e3fbf3501b1cc50800bf1af90f88b123beee0030;hb=9668dc27a02b59d7bfb1e9e40af3d2619700ad69;hp=65ba8b27ccb1ff66f52d6bd83a6b2af86f3980be;hpb=9bb89434103e9a22f100d6ecb7e65a5d461e0454;p=Lucu.git diff --git a/Network/HTTP/Lucu/Parser/Http.hs b/Network/HTTP/Lucu/Parser/Http.hs index 65ba8b2..e3fbf35 100644 --- a/Network/HTTP/Lucu/Parser/Http.hs +++ b/Network/HTTP/Lucu/Parser/Http.hs @@ -1,6 +1,6 @@ {-# LANGUAGE - BangPatterns - , OverloadedStrings + OverloadedStrings + , ScopedTypeVariables , UnicodeSyntax #-} -- |This is an auxiliary parser utilities for parsing things related @@ -27,19 +27,27 @@ module Network.HTTP.Lucu.Parser.Http , qvalue , atMost + , manyCharsTill ) where import Control.Applicative -import Control.Applicative.Unicode +import Control.Applicative.Unicode hiding ((∅)) import Control.Monad.Unicode import Data.Ascii (Ascii) import qualified Data.Ascii as A -import Data.Attoparsec.Char8 as P +import Data.Attoparsec.Char8 as P hiding (scan) import qualified Data.Attoparsec.FastSet as FS import qualified Data.ByteString.Char8 as BS +import qualified Data.ByteString.Lazy.Char8 as LS +import qualified Data.ByteString.Lazy.Internal as LS +import Data.Foldable +import Data.Monoid +import Data.Monoid.Unicode +import qualified Data.Sequence as S +import Data.Sequence.Unicode hiding ((∅)) import Prelude.Unicode --- |@'isCtl' c@ is 'Prelude.False' iff @0x20 <= c < 0x7F@. +-- |@'isCtl' c@ returns 'False' iff @0x20 <= c < 0x7F@. isCtl ∷ Char → Bool {-# INLINE isCtl #-} isCtl c @@ -52,7 +60,7 @@ isText ∷ Char → Bool {-# INLINE isText #-} isText = (¬) ∘ isCtl --- |@'isSeparator' c@ is 'Prelude.True' iff c is one of HTTP +-- |@'isSeparator' c@ returns 'True' iff c is one of the HTTP -- separators. isSeparator ∷ Char → Bool {-# INLINE isSeparator #-} @@ -61,30 +69,27 @@ isSeparator = flip FS.memberChar set {-# NOINLINE set #-} set = FS.charClass "()<>@,;:\\\"/[]?={}\x20\x09" --- |@'isChar' c@ is 'Prelude.True' iff @c <= 0x7f@. +-- |@'isChar' c@ returns 'True' iff @c <= 0x7f@. isChar ∷ Char → Bool {-# INLINE isChar #-} isChar = (≤ '\x7F') --- |@'isToken' c@ is equivalent to @not ('isCtl' c || 'isSeparator' +-- |@'isToken' c@ is equivalent to @not ('isCtl' c '||' 'isSeparator' -- c)@ isToken ∷ Char → Bool {-# INLINE isToken #-} -isToken !c - = (¬) (isCtl c ∨ isSeparator c) +isToken c = (¬) (isCtl c ∨ isSeparator c) -- |@'listOf' p@ is similar to @'sepBy' p ('char' \',\')@ but it -- allows any occurrences of 'lws' before and after each tokens. listOf ∷ Parser a → Parser [a] {-# INLINEABLE listOf #-} -listOf p - = try $ - do skipMany lws - sepBy p $ do skipMany lws - _ <- char ',' - skipMany lws - --- |'token' is similar to @'takeWhile1' 'isToken'@ +listOf p = do skipMany lws + sepBy p $ do skipMany lws + _ ← char ',' + skipMany lws + +-- |'token' is almost the same as @'takeWhile1' 'isToken'@ token ∷ Parser Ascii {-# INLINE token #-} token = A.unsafeFromByteString <$> takeWhile1 isToken @@ -92,20 +97,19 @@ token = A.unsafeFromByteString <$> takeWhile1 isToken -- |The CRLF: 0x0D 0x0A. crlf ∷ Parser () {-# INLINE crlf #-} -crlf = string "\x0D\x0A" ≫ return () +crlf = string "\x0D\x0A" *> return () -- |The SP: 0x20. sp ∷ Parser () {-# INLINE sp #-} -sp = char '\x20' ≫ return () +sp = char '\x20' *> return () -- |HTTP LWS: crlf? (sp | ht)+ lws ∷ Parser () {-# INLINEABLE lws #-} -lws = try $ - do option () crlf - _ ← satisfy isSPHT - skipWhile isSPHT +lws = do option () crlf + _ ← takeWhile1 isSPHT + return () -- |Returns 'True' for SP and HT. isSPHT ∷ Char → Bool @@ -114,7 +118,7 @@ isSPHT '\x20' = True isSPHT '\x09' = True isSPHT _ = False --- |@'separators'@ is similar to @'takeWhile1' 'isSeparator'@. +-- |@'separators'@ is almost the same as @'takeWhile1' 'isSeparator'@. separators ∷ Parser Ascii {-# INLINE separators #-} separators = A.unsafeFromByteString <$> takeWhile1 isSeparator @@ -135,7 +139,7 @@ quotedStr = try $ quotedPair ∷ Parser Char {-# INLINE quotedPair #-} - quotedPair = char '\\' ≫ satisfy isChar + quotedPair = char '\\' *> satisfy isChar -- |'qvalue' accepts a so-called qvalue. qvalue ∷ Parser Double @@ -154,11 +158,69 @@ qvalue = do x ← char '0' return (y:ys) return $ read (x:xs) --- |@'atMost' n v@ is like @'P.many' v@ but applies the given action --- at most @n@ times. +-- |@'atMost' n v@ is like @'P.many' v@ but accumulates @v@ at most +-- @n@ times. atMost ∷ Alternative f ⇒ Int → f a → f [a] {-# INLINE atMost #-} atMost 0 _ = pure [] atMost n v = ( (:) <$> v ⊛ atMost (n-1) v ) <|> pure [] + +data CharAccumState + = CharAccumState { + casChunks ∷ !(S.Seq BS.ByteString) + , casLastChunk ∷ !(S.Seq Char) + } + +instance Monoid CharAccumState where + mempty + = CharAccumState { + casChunks = (∅) + , casLastChunk = (∅) + } + mappend a b + = b { + casChunks = (casChunks a ⊳ lastChunk a) ⋈ casChunks b + } + +lastChunk ∷ CharAccumState → BS.ByteString +{-# INLINE lastChunk #-} +lastChunk = BS.pack ∘ toList ∘ casLastChunk + +snoc ∷ CharAccumState → Char → CharAccumState +{-# INLINEABLE snoc #-} +snoc cas c + | S.length (casLastChunk cas) ≥ LS.defaultChunkSize + = cas { + casChunks = casChunks cas ⊳ lastChunk cas + , casLastChunk = S.singleton c + } + | otherwise + = cas { + casLastChunk = casLastChunk cas ⊳ c + } + +finish ∷ CharAccumState → LS.ByteString +{-# INLINEABLE finish #-} +finish cas + = let chunks = toList $ casChunks cas ⊳ lastChunk cas + str = LS.fromChunks chunks + in + str + +-- |@'manyCharsTill' p end@ takes as many characters untill @p@ +-- succeeds. +manyCharsTill ∷ ∀m b. (Monad m, Alternative m) + ⇒ m Char + → m b + → m LS.ByteString +{-# INLINEABLE manyCharsTill #-} +manyCharsTill p end = scan (∅) + where + scan ∷ CharAccumState → m LS.ByteString + {-# INLINE scan #-} + scan s + = (end *> pure (finish s)) + <|> + (scan =≪ (snoc s <$> p))