+{-# LANGUAGE
+ OverloadedStrings
+ , UnicodeSyntax
+ #-}
+-- |This is an auxiliary parser utilities for parsing things related
+-- on HTTP protocol.
+--
+-- In general you don't have to use this module directly.
module Network.HTTP.Lucu.Parser.Http
- ( isCtl -- Char -> Bool
- , isSeparator -- Char -> Bool
- , token -- Parser Char
+ ( isCtl
+ , isText
+ , isSeparator
+ , isChar
+ , isToken
+ , isSPHT
+
+ , listOf
+
+ , crlf
+ , sp
+ , lws
+
+ , token
+ , separators
+ , quotedStr
+ , qvalue
)
where
+import Control.Applicative
+import Control.Monad
+import Data.Ascii (Ascii)
+import qualified Data.Ascii as A
+import Data.Attoparsec.Char8
+import qualified Data.ByteString.Char8 as BS
+import Network.HTTP.Lucu.Parser
+import Prelude.Unicode
-import qualified Data.ByteString.Lazy.Char8 as B
-import Data.ByteString.Lazy.Char8 (ByteString)
-import Data.List
-import Network.HTTP.Lucu.Parser
-
-isCtl :: Char -> Bool
+-- |@'isCtl' c@ returns 'False' iff @0x20 <= c < 0x7F@.
+isCtl ∷ Char → Bool
+{-# INLINE isCtl #-}
isCtl c
- | c < '\x1f' = True
- | c == '\x7f' = True
+ | c ≤ '\x1f' = True
+ | c > '\x7f' = True
| otherwise = False
+-- |@'isText'@ is equivalent to @'not' '.' 'isCtl'@
+isText ∷ Char → Bool
+{-# INLINE isText #-}
+isText = (¬) ∘ isCtl
+
+-- |@'isSeparator' c@ returns 'True' iff c is one of the HTTP
+-- separators.
+isSeparator ∷ Char → Bool
+{-# INLINE isSeparator #-}
+isSeparator = inClass "()<>@,;:\\\"/[]?={}\x20\x09"
+
+-- |@'isChar' c@ returns 'True' iff @c <= 0x7f@.
+isChar ∷ Char → Bool
+{-# INLINE isChar #-}
+isChar = (≤ '\x7F')
+
+-- |@'isToken' c@ is equivalent to @not ('isCtl' c '||' 'isSeparator'
+-- c)@
+isToken ∷ Char → Bool
+{-# INLINE isToken #-}
+isToken c = (¬) (isCtl c ∨ isSeparator c)
+
+-- |@'listOf' p@ is similar to @'sepBy' p ('char' \',\')@ but it
+-- allows any occurrences of 'lws' before and after each tokens.
+listOf ∷ Parser a → Parser [a]
+{-# INLINEABLE listOf #-}
+listOf p
+ = do skipMany lws
+ p `sepBy` do skipMany lws
+ void $ char ','
+ skipMany lws
+ <?>
+ "listOf"
-isSeparator :: Char -> Bool
-isSeparator c = elem c "()<>@,;:\\\"/[]?={} \t"
+-- |'token' is almost the same as @'takeWhile1' 'isToken'@
+token ∷ Parser Ascii
+{-# INLINE token #-}
+token = (A.unsafeFromByteString <$> takeWhile1 isToken)
+ <?>
+ "token"
+
+-- |The CRLF: 0x0D 0x0A.
+crlf ∷ Parser ()
+{-# INLINE crlf #-}
+crlf = (string "\x0D\x0A" *> return ())
+ <?>
+ "crlf"
+
+-- |The SP: 0x20.
+sp ∷ Parser ()
+{-# INLINE sp #-}
+sp = char '\x20' *> return ()
+
+-- |HTTP LWS: crlf? (sp | ht)+
+lws ∷ Parser ()
+{-# INLINEABLE lws #-}
+lws = (option () crlf *> void (takeWhile1 isSPHT))
+ <?>
+ "lws"
+
+-- |Returns 'True' for SP and HT.
+isSPHT ∷ Char → Bool
+{-# INLINE isSPHT #-}
+isSPHT '\x20' = True
+isSPHT '\x09' = True
+isSPHT _ = False
+
+-- |@'separators'@ is almost the same as @'takeWhile1' 'isSeparator'@.
+separators ∷ Parser Ascii
+{-# INLINE separators #-}
+separators = (A.unsafeFromByteString <$> takeWhile1 isSeparator)
+ <?>
+ "separators"
+
+-- |'quotedStr' accepts a string surrounded by double quotation
+-- marks. Quotes can be escaped by backslashes.
+quotedStr ∷ Parser Ascii
+{-# INLINEABLE quotedStr #-}
+quotedStr = do void $ char '"'
+ xs ← many (qdtext <|> quotedPair)
+ void $ char '"'
+ return ∘ A.unsafeFromByteString $ BS.pack xs
+ <?>
+ "quotedStr"
+ where
+ qdtext ∷ Parser Char
+ {-# INLINE qdtext #-}
+ qdtext = satisfy (\c → c ≢ '"' ∧ (¬) (isCtl c))
+ <?>
+ "qdtext"
+ quotedPair ∷ Parser Char
+ {-# INLINE quotedPair #-}
+ quotedPair = (char '\\' *> satisfy isChar)
+ <?>
+ "quotedPair"
-token :: Parser Char
-token = satisfy (\ c -> not (isCtl c || isSeparator c))
+-- |'qvalue' accepts a so-called qvalue.
+qvalue ∷ Parser Double
+{-# INLINEABLE qvalue #-}
+qvalue = ( do x ← char '0'
+ xs ← option "" $
+ do y ← char '.'
+ ys ← atMost 3 digit
+ return (y:ys)
+ return $ read (x:xs)
+ <|>
+ do x ← char '1'
+ xs ← option "" $
+ do y ← char '.'
+ ys ← atMost 3 (char '0')
+ return (y:ys)
+ return $ read (x:xs)
+ )
+ <?>
+ "qvalue"