+{-# LANGUAGE
+ OverloadedStrings
+ , ScopedTypeVariables
+ , UnicodeSyntax
+ #-}
-- |This is an auxiliary parser utilities for parsing things related
-- on HTTP protocol.
--
-- In general you don't have to use this module directly.
module Network.HTTP.Lucu.Parser.Http
( isCtl
+ , isText
, isSeparator
, isChar
, isToken
+ , isSPHT
+
, listOf
- , token
+
+ , crlf
+ , sp
, lws
- , text
- , separator
+
+ , token
+ , separators
, quotedStr
, qvalue
+
+ , atMost
+ , manyCharsTill
)
where
+import Control.Applicative
+import Control.Applicative.Unicode hiding ((∅))
+import Control.Monad.Unicode
+import Data.Ascii (Ascii)
+import qualified Data.Ascii as A
+import Data.Attoparsec.Char8 as P hiding (scan)
+import qualified Data.Attoparsec.FastSet as FS
+import qualified Data.ByteString.Char8 as BS
+import qualified Data.ByteString.Lazy.Char8 as LS
+import qualified Data.ByteString.Lazy.Internal as LS
+import Data.Foldable
+import Data.Monoid
+import Data.Monoid.Unicode
+import qualified Data.Sequence as S
+import Data.Sequence.Unicode hiding ((∅))
+import Prelude.Unicode
-import Network.HTTP.Lucu.Parser
-
--- |@'isCtl' c@ is 'Prelude.False' iff @0x20 <= @c@ < 0x7F@.
-isCtl :: Char -> Bool
+-- |@'isCtl' c@ is 'Prelude.False' iff @0x20 <= c < 0x7F@.
+isCtl ∷ Char → Bool
+{-# INLINE isCtl #-}
isCtl c
- | c < '\x1f' = True
- | c >= '\x7f' = True
- | otherwise = False
+ | c ≤ '\x1f' = True
+ | c > '\x7f' = True
+ | otherwise = False
+
+-- |@'isText'@ is equivalent to @'not' '.' 'isCtl'@
+isText ∷ Char → Bool
+{-# INLINE isText #-}
+isText = (¬) ∘ isCtl
-- |@'isSeparator' c@ is 'Prelude.True' iff c is one of HTTP
-- separators.
-isSeparator :: Char -> Bool
-isSeparator '(' = True
-isSeparator ')' = True
-isSeparator '<' = True
-isSeparator '>' = True
-isSeparator '@' = True
-isSeparator ',' = True
-isSeparator ';' = True
-isSeparator ':' = True
-isSeparator '\\' = True
-isSeparator '"' = True
-isSeparator '/' = True
-isSeparator '[' = True
-isSeparator ']' = True
-isSeparator '?' = True
-isSeparator '=' = True
-isSeparator '{' = True
-isSeparator '}' = True
-isSeparator ' ' = True
-isSeparator '\t' = True
-isSeparator _ = False
+isSeparator ∷ Char → Bool
+{-# INLINE isSeparator #-}
+isSeparator = flip FS.memberChar set
+ where
+ {-# NOINLINE set #-}
+ set = FS.charClass "()<>@,;:\\\"/[]?={}\x20\x09"
-- |@'isChar' c@ is 'Prelude.True' iff @c <= 0x7f@.
-isChar :: Char -> Bool
-isChar c
- | c <= '\x7f' = True
- | otherwise = False
+isChar ∷ Char → Bool
+{-# INLINE isChar #-}
+isChar = (≤ '\x7F')
-- |@'isToken' c@ is equivalent to @not ('isCtl' c || 'isSeparator'
-- c)@
-isToken :: Char -> Bool
-isToken c = c `seq`
- not (isCtl c || isSeparator c)
-
--- |@'listOf' p@ is similar to @'Network.HTTP.Lucu.Parser.sepBy' p
--- ('Network.HTTP.Lucu.Parser.char' \',\')@ but it allows any
--- occurrences of LWS before and after each tokens.
-listOf :: Parser a -> Parser [a]
-listOf !p = do _ <- many lws
- sepBy p $! do _ <- many lws
- _ <- char ','
- many lws
-
--- |'token' is equivalent to @'Network.HTTP.Lucu.Parser.many1' $
--- 'Network.HTTP.Lucu.Parser.satisfy' 'isToken'@
-token :: Parser String
-token = many1 $! satisfy isToken
-
--- |'lws' is an HTTP LWS: @'Network.HTTP.Lucu.Parser.crlf'?
--- ('Network.HTTP.Lucu.Parser.sp' | 'Network.HTTP.Lucu.Parser.ht')+@
-lws :: Parser String
-lws = do s <- option "" crlf
- xs <- many1 (sp <|> ht)
- return (s ++ xs)
-
--- |'text' accepts one character which doesn't satisfy 'isCtl'.
-text :: Parser Char
-text = satisfy (not . isCtl)
-
--- |'separator' accepts one character which satisfies 'isSeparator'.
-separator :: Parser Char
-separator = satisfy isSeparator
+isToken ∷ Char → Bool
+{-# INLINE isToken #-}
+isToken c = (¬) (isCtl c ∨ isSeparator c)
+
+-- |@'listOf' p@ is similar to @'sepBy' p ('char' \',\')@ but it
+-- allows any occurrences of 'lws' before and after each tokens.
+listOf ∷ Parser a → Parser [a]
+{-# INLINEABLE listOf #-}
+listOf p = do skipMany lws
+ sepBy p $ do skipMany lws
+ _ ← char ','
+ skipMany lws
+
+-- |'token' is similar to @'takeWhile1' 'isToken'@
+token ∷ Parser Ascii
+{-# INLINE token #-}
+token = A.unsafeFromByteString <$> takeWhile1 isToken
+
+-- |The CRLF: 0x0D 0x0A.
+crlf ∷ Parser ()
+{-# INLINE crlf #-}
+crlf = string "\x0D\x0A" *> return ()
+
+-- |The SP: 0x20.
+sp ∷ Parser ()
+{-# INLINE sp #-}
+sp = char '\x20' *> return ()
+
+-- |HTTP LWS: crlf? (sp | ht)+
+lws ∷ Parser ()
+{-# INLINEABLE lws #-}
+lws = do option () crlf
+ _ ← takeWhile1 isSPHT
+ return ()
+
+-- |Returns 'True' for SP and HT.
+isSPHT ∷ Char → Bool
+{-# INLINE isSPHT #-}
+isSPHT '\x20' = True
+isSPHT '\x09' = True
+isSPHT _ = False
+
+-- |@'separators'@ is similar to @'takeWhile1' 'isSeparator'@.
+separators ∷ Parser Ascii
+{-# INLINE separators #-}
+separators = A.unsafeFromByteString <$> takeWhile1 isSeparator
-- |'quotedStr' accepts a string surrounded by double quotation
-- marks. Quotes can be escaped by backslashes.
-quotedStr :: Parser String
-quotedStr = do _ <- char '"'
- xs <- many (qdtext <|> quotedPair)
- _ <- char '"'
- return $ foldr (++) "" xs
+quotedStr ∷ Parser Ascii
+{-# INLINEABLE quotedStr #-}
+quotedStr = try $
+ do _ ← char '"'
+ xs ← P.many (qdtext <|> quotedPair)
+ _ ← char '"'
+ return $ A.unsafeFromByteString $ BS.pack xs
where
- qdtext = do c <- satisfy (/= '"')
- return [c]
+ qdtext ∷ Parser Char
+ {-# INLINE qdtext #-}
+ qdtext = satisfy (\c → c ≢ '"' ∧ (¬) (isCtl c))
- quotedPair = do _ <- char '\\'
- c <- satisfy isChar
- return [c]
+ quotedPair ∷ Parser Char
+ {-# INLINE quotedPair #-}
+ quotedPair = char '\\' *> satisfy isChar
-- |'qvalue' accepts a so-called qvalue.
-qvalue :: Parser Double
-qvalue = do x <- char '0'
- xs <- option ""
- $ do y <- char '.'
- ys <- many digit -- 本當は三文字までに制限
- return (y:ys)
+qvalue ∷ Parser Double
+{-# INLINEABLE qvalue #-}
+qvalue = do x ← char '0'
+ xs ← option "" $
+ do y ← char '.'
+ ys ← atMost 3 digit
+ return (y:ys)
return $ read (x:xs)
<|>
- do x <- char '1'
- xs <- option ""
- $ do y <- char '.'
- ys <- many (char '0') -- 本當は三文字までに制限
- return (y:ys)
+ do x ← char '1'
+ xs ← option "" $
+ do y ← char '.'
+ ys ← atMost 3 (char '0')
+ return (y:ys)
return $ read (x:xs)
+
+-- |@'atMost' n v@ is like @'P.many' v@ but applies the given action
+-- at most @n@ times.
+atMost ∷ Alternative f ⇒ Int → f a → f [a]
+{-# INLINE atMost #-}
+atMost 0 _ = pure []
+atMost n v = ( (:) <$> v ⊛ atMost (n-1) v )
+ <|>
+ pure []
+
+
+data CharAccumState
+ = CharAccumState {
+ casChunks ∷ !(S.Seq BS.ByteString)
+ , casLastChunk ∷ !(S.Seq Char)
+ }
+
+instance Monoid CharAccumState where
+ mempty
+ = CharAccumState {
+ casChunks = (∅)
+ , casLastChunk = (∅)
+ }
+ mappend a b
+ = b {
+ casChunks = (casChunks a ⊳ lastChunk a) ⋈ casChunks b
+ }
+
+lastChunk ∷ CharAccumState → BS.ByteString
+{-# INLINE lastChunk #-}
+lastChunk = BS.pack ∘ toList ∘ casLastChunk
+
+snoc ∷ CharAccumState → Char → CharAccumState
+{-# INLINEABLE snoc #-}
+snoc cas c
+ | S.length (casLastChunk cas) ≥ LS.defaultChunkSize
+ = cas {
+ casChunks = casChunks cas ⊳ lastChunk cas
+ , casLastChunk = S.singleton c
+ }
+ | otherwise
+ = cas {
+ casLastChunk = casLastChunk cas ⊳ c
+ }
+
+finish ∷ CharAccumState → LS.ByteString
+{-# INLINEABLE finish #-}
+finish cas
+ = let chunks = toList $ casChunks cas ⊳ lastChunk cas
+ str = LS.fromChunks chunks
+ in
+ str
+
+manyCharsTill ∷ ∀m b. (Monad m, Alternative m)
+ ⇒ m Char
+ → m b
+ → m LS.ByteString
+{-# INLINEABLE manyCharsTill #-}
+manyCharsTill p end = scan (∅)
+ where
+ scan ∷ CharAccumState → m LS.ByteString
+ {-# INLINE scan #-}
+ scan s
+ = (end *> pure (finish s))
+ <|>
+ (scan =≪ (snoc s <$> p))