{-# LANGUAGE BangPatterns , OverloadedStrings , ScopedTypeVariables , UnicodeSyntax #-} -- |This is an auxiliary parser utilities for parsing things related -- on HTTP protocol. -- -- In general you don't have to use this module directly. module Network.HTTP.Lucu.Parser.Http ( isCtl , isText , isSeparator , isChar , isToken , isSPHT , listOf , crlf , sp , lws , token , separators , quotedStr , qvalue , atMost , manyCharsTill ) where import Control.Applicative import Control.Applicative.Unicode hiding ((∅)) import Control.Monad.Unicode import Data.Ascii (Ascii) import qualified Data.Ascii as A import Data.Attoparsec.Char8 as P hiding (scan) import qualified Data.Attoparsec.FastSet as FS import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as LS import qualified Data.ByteString.Lazy.Internal as LS import qualified Data.Foldable as F import Data.Monoid import Data.Monoid.Unicode import qualified Data.Sequence as S import Data.Sequence.Unicode hiding ((∅)) import Prelude.Unicode -- |@'isCtl' c@ is 'Prelude.False' iff @0x20 <= c < 0x7F@. isCtl ∷ Char → Bool {-# INLINE isCtl #-} isCtl c | c ≤ '\x1f' = True | c > '\x7f' = True | otherwise = False -- |@'isText'@ is equivalent to @'not' '.' 'isCtl'@ isText ∷ Char → Bool {-# INLINE isText #-} isText = (¬) ∘ isCtl -- |@'isSeparator' c@ is 'Prelude.True' iff c is one of HTTP -- separators. isSeparator ∷ Char → Bool {-# INLINE isSeparator #-} isSeparator = flip FS.memberChar set where {-# NOINLINE set #-} set = FS.charClass "()<>@,;:\\\"/[]?={}\x20\x09" -- |@'isChar' c@ is 'Prelude.True' iff @c <= 0x7f@. isChar ∷ Char → Bool {-# INLINE isChar #-} isChar = (≤ '\x7F') -- |@'isToken' c@ is equivalent to @not ('isCtl' c || 'isSeparator' -- c)@ isToken ∷ Char → Bool {-# INLINE isToken #-} isToken !c = (¬) (isCtl c ∨ isSeparator c) -- |@'listOf' p@ is similar to @'sepBy' p ('char' \',\')@ but it -- allows any occurrences of 'lws' before and after each tokens. listOf ∷ Parser a → Parser [a] {-# INLINEABLE listOf #-} listOf p = try $ do skipMany lws sepBy p $ do skipMany lws _ <- char ',' skipMany lws -- |'token' is similar to @'takeWhile1' 'isToken'@ token ∷ Parser Ascii {-# INLINE token #-} token = A.unsafeFromByteString <$> takeWhile1 isToken -- |The CRLF: 0x0D 0x0A. crlf ∷ Parser () {-# INLINE crlf #-} crlf = string "\x0D\x0A" ≫ return () -- |The SP: 0x20. sp ∷ Parser () {-# INLINE sp #-} sp = char '\x20' ≫ return () -- |HTTP LWS: crlf? (sp | ht)+ lws ∷ Parser () {-# INLINEABLE lws #-} lws = try $ do option () crlf _ ← satisfy isSPHT skipWhile isSPHT -- |Returns 'True' for SP and HT. isSPHT ∷ Char → Bool {-# INLINE isSPHT #-} isSPHT '\x20' = True isSPHT '\x09' = True isSPHT _ = False -- |@'separators'@ is similar to @'takeWhile1' 'isSeparator'@. separators ∷ Parser Ascii {-# INLINE separators #-} separators = A.unsafeFromByteString <$> takeWhile1 isSeparator -- |'quotedStr' accepts a string surrounded by double quotation -- marks. Quotes can be escaped by backslashes. quotedStr ∷ Parser Ascii {-# INLINEABLE quotedStr #-} quotedStr = try $ do _ ← char '"' xs ← P.many (qdtext <|> quotedPair) _ ← char '"' return $ A.unsafeFromByteString $ BS.pack xs where qdtext ∷ Parser Char {-# INLINE qdtext #-} qdtext = satisfy (\c → c ≢ '"' ∧ (¬) (isCtl c)) quotedPair ∷ Parser Char {-# INLINE quotedPair #-} quotedPair = char '\\' ≫ satisfy isChar -- |'qvalue' accepts a so-called qvalue. qvalue ∷ Parser Double {-# INLINEABLE qvalue #-} qvalue = do x ← char '0' xs ← option "" $ do y ← char '.' ys ← atMost 3 digit return (y:ys) return $ read (x:xs) <|> do x ← char '1' xs ← option "" $ do y ← char '.' ys ← atMost 3 (char '0') return (y:ys) return $ read (x:xs) -- |@'atMost' n v@ is like @'P.many' v@ but applies the given action -- at most @n@ times. atMost ∷ Alternative f ⇒ Int → f a → f [a] {-# INLINE atMost #-} atMost 0 _ = pure [] atMost n v = ( (:) <$> v ⊛ atMost (n-1) v ) <|> pure [] data CharAccumState = CharAccumState { casChunks ∷ !(S.Seq BS.ByteString) , casLastChunk ∷ !(S.Seq Char) } instance Monoid CharAccumState where mempty = CharAccumState { casChunks = (∅) , casLastChunk = (∅) } mappend a b = b { casChunks = (casChunks a ⊳ lastChunk a) ⋈ casChunks b } lastChunk ∷ CharAccumState → BS.ByteString {-# INLINE lastChunk #-} lastChunk = BS.pack ∘ F.toList ∘ casLastChunk snoc ∷ CharAccumState → Char → CharAccumState {-# INLINEABLE snoc #-} snoc cas c | S.length (casLastChunk cas) ≥ LS.defaultChunkSize = cas { casChunks = casChunks cas ⊳ lastChunk cas , casLastChunk = S.singleton c } | otherwise = cas { casLastChunk = casLastChunk cas ⊳ c } finish ∷ CharAccumState → LS.ByteString {-# INLINEABLE finish #-} finish cas = let chunks = F.toList $ casChunks cas ⊳ lastChunk cas str = LS.fromChunks chunks in str manyCharsTill ∷ ∀m b. (Monad m, Alternative m) ⇒ m Char → m b → m LS.ByteString {-# INLINEABLE manyCharsTill #-} manyCharsTill p end = scan (∅) where scan ∷ CharAccumState → m LS.ByteString {-# INLINE scan #-} scan s = (end *> pure (finish s)) <|> (scan =≪ (snoc s <$> p))