6 -- |This is an auxiliary parser utilities for parsing things related
9 -- In general you don't have to use this module directly.
10 module Network.HTTP.Lucu.Parser.Http
33 import Control.Applicative
34 import Control.Applicative.Unicode hiding ((∅))
35 import Control.Monad.Unicode
36 import Data.Ascii (Ascii)
37 import qualified Data.Ascii as A
38 import Data.Attoparsec.Char8 as P hiding (scan)
39 import qualified Data.Attoparsec.FastSet as FS
40 import qualified Data.ByteString.Char8 as BS
41 import qualified Data.ByteString.Lazy.Char8 as LS
42 import qualified Data.ByteString.Lazy.Internal as LS
45 import Data.Monoid.Unicode
46 import qualified Data.Sequence as S
47 import Data.Sequence.Unicode hiding ((∅))
48 import Prelude.Unicode
50 -- |@'isCtl' c@ returns 'False' iff @0x20 <= c < 0x7F@.
58 -- |@'isText'@ is equivalent to @'not' '.' 'isCtl'@
63 -- |@'isSeparator' c@ returns 'True' iff c is one of the HTTP
65 isSeparator ∷ Char → Bool
66 {-# INLINE isSeparator #-}
67 isSeparator = flip FS.memberChar set
70 set = FS.charClass "()<>@,;:\\\"/[]?={}\x20\x09"
72 -- |@'isChar' c@ returns 'True' iff @c <= 0x7f@.
77 -- |@'isToken' c@ is equivalent to @not ('isCtl' c '||' 'isSeparator'
80 {-# INLINE isToken #-}
81 isToken c = (¬) (isCtl c ∨ isSeparator c)
83 -- |@'listOf' p@ is similar to @'sepBy' p ('char' \',\')@ but it
84 -- allows any occurrences of 'lws' before and after each tokens.
85 listOf ∷ Parser a → Parser [a]
86 {-# INLINEABLE listOf #-}
87 listOf p = do skipMany lws
88 sepBy p $ do skipMany lws
92 -- |'token' is almost the same as @'takeWhile1' 'isToken'@
95 token = A.unsafeFromByteString <$> takeWhile1 isToken
97 -- |The CRLF: 0x0D 0x0A.
100 crlf = string "\x0D\x0A" *> return ()
105 sp = char '\x20' *> return ()
107 -- |HTTP LWS: crlf? (sp | ht)+
109 {-# INLINEABLE lws #-}
110 lws = do option () crlf
111 _ ← takeWhile1 isSPHT
114 -- |Returns 'True' for SP and HT.
116 {-# INLINE isSPHT #-}
121 -- |@'separators'@ is almost the same as @'takeWhile1' 'isSeparator'@.
122 separators ∷ Parser Ascii
123 {-# INLINE separators #-}
124 separators = A.unsafeFromByteString <$> takeWhile1 isSeparator
126 -- |'quotedStr' accepts a string surrounded by double quotation
127 -- marks. Quotes can be escaped by backslashes.
128 quotedStr ∷ Parser Ascii
129 {-# INLINEABLE quotedStr #-}
132 xs ← P.many (qdtext <|> quotedPair)
134 return $ A.unsafeFromByteString $ BS.pack xs
137 {-# INLINE qdtext #-}
138 qdtext = satisfy (\c → c ≢ '"' ∧ (¬) (isCtl c))
140 quotedPair ∷ Parser Char
141 {-# INLINE quotedPair #-}
142 quotedPair = char '\\' *> satisfy isChar
144 -- |'qvalue' accepts a so-called qvalue.
145 qvalue ∷ Parser Double
146 {-# INLINEABLE qvalue #-}
147 qvalue = do x ← char '0'
157 ys ← atMost 3 (char '0')
161 -- |@'atMost' n v@ is like @'P.many' v@ but accumulates @v@ at most
163 atMost ∷ Alternative f ⇒ Int → f a → f [a]
164 {-# INLINE atMost #-}
166 atMost n v = ( (:) <$> v ⊛ atMost (n-1) v )
172 casChunks ∷ !(S.Seq BS.ByteString)
173 , casLastChunk ∷ !(S.Seq Char)
176 instance Monoid CharAccumState where
184 casChunks = (casChunks a ⊳ lastChunk a) ⋈ casChunks b
187 lastChunk ∷ CharAccumState → BS.ByteString
188 {-# INLINE lastChunk #-}
189 lastChunk = BS.pack ∘ toList ∘ casLastChunk
191 snoc ∷ CharAccumState → Char → CharAccumState
192 {-# INLINEABLE snoc #-}
194 | S.length (casLastChunk cas) ≥ LS.defaultChunkSize
196 casChunks = casChunks cas ⊳ lastChunk cas
197 , casLastChunk = S.singleton c
201 casLastChunk = casLastChunk cas ⊳ c
204 finish ∷ CharAccumState → LS.ByteString
205 {-# INLINEABLE finish #-}
207 = let chunks = toList $ casChunks cas ⊳ lastChunk cas
208 str = LS.fromChunks chunks
212 -- |@'manyCharsTill' p end@ takes as many characters untill @p@
214 manyCharsTill ∷ ∀m b. (Monad m, Alternative m)
218 {-# INLINEABLE manyCharsTill #-}
219 manyCharsTill p end = scan (∅)
221 scan ∷ CharAccumState → m LS.ByteString
224 = (end *> pure (finish s))
226 (scan =≪ (snoc s <$> p))