{-# LANGUAGE BangPatterns , OverloadedStrings , ScopedTypeVariables , UnicodeSyntax #-} -- |This is an auxiliary parser utilities for parsing things related -- on HTTP protocol. -- -- In general you don't have to use this module directly. module Network.HTTP.Lucu.Parser.Http ( isCtl , isText , isSeparator , isChar , isToken , isSPHT , listOf , crlf , sp , lws , token , separators , quotedStr , qvalue , atMost , manyCharsTill ) where import Control.Applicative import Control.Applicative.Unicode hiding ((∅)) import Control.Monad.Unicode import Data.Ascii (Ascii) import qualified Data.Ascii as A import Data.Attoparsec.Char8 as P hiding (scan) import qualified Data.Attoparsec.FastSet as FS import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as LS import qualified Data.ByteString.Lazy.Internal as LS import Data.Foldable import Data.Monoid import Data.Monoid.Unicode import qualified Data.Sequence as S import Data.Sequence.Unicode hiding ((∅)) import Prelude.Unicode -- |@'isCtl' c@ is 'Prelude.False' iff @0x20 <= c < 0x7F@. isCtl ∷ Char → Bool {-# INLINE isCtl #-} isCtl c | c ≤ '\x1f' = True | c > '\x7f' = True | otherwise = False -- |@'isText'@ is equivalent to @'not' '.' 'isCtl'@ isText ∷ Char → Bool {-# INLINE isText #-} isText = (¬) ∘ isCtl -- |@'isSeparator' c@ is 'Prelude.True' iff c is one of HTTP -- separators. isSeparator ∷ Char → Bool {-# INLINE isSeparator #-} isSeparator = flip FS.memberChar set where {-# NOINLINE set #-} set = FS.charClass "()<>@,;:\\\"/[]?={}\x20\x09" -- |@'isChar' c@ is 'Prelude.True' iff @c <= 0x7f@. isChar ∷ Char → Bool {-# INLINE isChar #-} isChar = (≤ '\x7F') -- |@'isToken' c@ is equivalent to @not ('isCtl' c || 'isSeparator' -- c)@ isToken ∷ Char → Bool {-# INLINE isToken #-} isToken !c = (¬) (isCtl c ∨ isSeparator c) -- |@'listOf' p@ is similar to @'sepBy' p ('char' \',\')@ but it -- allows any occurrences of 'lws' before and after each tokens. listOf ∷ Parser a → Parser [a] {-# INLINEABLE listOf #-} listOf p = do skipMany lws sepBy p $ do skipMany lws _ <- char ',' skipMany lws -- |'token' is similar to @'takeWhile1' 'isToken'@ token ∷ Parser Ascii {-# INLINE token #-} token = A.unsafeFromByteString <$> takeWhile1 isToken -- |The CRLF: 0x0D 0x0A. crlf ∷ Parser () {-# INLINE crlf #-} crlf = string "\x0D\x0A" ≫ return () -- |The SP: 0x20. sp ∷ Parser () {-# INLINE sp #-} sp = char '\x20' ≫ return () -- |HTTP LWS: crlf? (sp | ht)+ lws ∷ Parser () {-# INLINEABLE lws #-} lws = do option () crlf _ ← takeWhile1 isSPHT return () -- |Returns 'True' for SP and HT. isSPHT ∷ Char → Bool {-# INLINE isSPHT #-} isSPHT '\x20' = True isSPHT '\x09' = True isSPHT _ = False -- |@'separators'@ is similar to @'takeWhile1' 'isSeparator'@. separators ∷ Parser Ascii {-# INLINE separators #-} separators = A.unsafeFromByteString <$> takeWhile1 isSeparator -- |'quotedStr' accepts a string surrounded by double quotation -- marks. Quotes can be escaped by backslashes. quotedStr ∷ Parser Ascii {-# INLINEABLE quotedStr #-} quotedStr = try $ do _ ← char '"' xs ← P.many (qdtext <|> quotedPair) _ ← char '"' return $ A.unsafeFromByteString $ BS.pack xs where qdtext ∷ Parser Char {-# INLINE qdtext #-} qdtext = satisfy (\c → c ≢ '"' ∧ (¬) (isCtl c)) quotedPair ∷ Parser Char {-# INLINE quotedPair #-} quotedPair = char '\\' ≫ satisfy isChar -- |'qvalue' accepts a so-called qvalue. qvalue ∷ Parser Double {-# INLINEABLE qvalue #-} qvalue = do x ← char '0' xs ← option "" $ do y ← char '.' ys ← atMost 3 digit return (y:ys) return $ read (x:xs) <|> do x ← char '1' xs ← option "" $ do y ← char '.' ys ← atMost 3 (char '0') return (y:ys) return $ read (x:xs) -- |@'atMost' n v@ is like @'P.many' v@ but applies the given action -- at most @n@ times. atMost ∷ Alternative f ⇒ Int → f a → f [a] {-# INLINE atMost #-} atMost 0 _ = pure [] atMost n v = ( (:) <$> v ⊛ atMost (n-1) v ) <|> pure [] data CharAccumState = CharAccumState { casChunks ∷ !(S.Seq BS.ByteString) , casLastChunk ∷ !(S.Seq Char) } instance Monoid CharAccumState where mempty = CharAccumState { casChunks = (∅) , casLastChunk = (∅) } mappend a b = b { casChunks = (casChunks a ⊳ lastChunk a) ⋈ casChunks b } lastChunk ∷ CharAccumState → BS.ByteString {-# INLINE lastChunk #-} lastChunk = BS.pack ∘ toList ∘ casLastChunk snoc ∷ CharAccumState → Char → CharAccumState {-# INLINEABLE snoc #-} snoc cas c | S.length (casLastChunk cas) ≥ LS.defaultChunkSize = cas { casChunks = casChunks cas ⊳ lastChunk cas , casLastChunk = S.singleton c } | otherwise = cas { casLastChunk = casLastChunk cas ⊳ c } finish ∷ CharAccumState → LS.ByteString {-# INLINEABLE finish #-} finish cas = let chunks = toList $ casChunks cas ⊳ lastChunk cas str = LS.fromChunks chunks in str manyCharsTill ∷ ∀m b. (Monad m, Alternative m) ⇒ m Char → m b → m LS.ByteString {-# INLINEABLE manyCharsTill #-} manyCharsTill p end = scan (∅) where scan ∷ CharAccumState → m LS.ByteString {-# INLINE scan #-} scan s = (end *> pure (finish s)) <|> (scan =≪ (snoc s <$> p))