X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FParser%2FHttp.hs;h=520034247726f3ec6398eb8b69b143eb08456ceb;hb=b923d45;hp=93fc14d9c80077cf3f2ba63270b215c8fc3f7442;hpb=c6b11025d1f81c668e9995e856b7bb34175230d3;p=Lucu.git diff --git a/Network/HTTP/Lucu/Parser/Http.hs b/Network/HTTP/Lucu/Parser/Http.hs index 93fc14d..5200342 100644 --- a/Network/HTTP/Lucu/Parser/Http.hs +++ b/Network/HTTP/Lucu/Parser/Http.hs @@ -1,77 +1,228 @@ +{-# LANGUAGE + BangPatterns + , OverloadedStrings + , ScopedTypeVariables + , UnicodeSyntax + #-} +-- |This is an auxiliary parser utilities for parsing things related +-- on HTTP protocol. +-- +-- In general you don't have to use this module directly. module Network.HTTP.Lucu.Parser.Http - ( isCtl -- Char -> Bool - , isSeparator -- Char -> Bool - , isChar -- Char -> Bool - , isToken -- Char -> Bool - , listOf -- Parser a -> Parser [a] - , token -- Parser String - , lws -- Parser String - , text -- Parser Char - , separator -- Parser Char - , quotedStr -- Parser String + ( isCtl + , isText + , isSeparator + , isChar + , isToken + , isSPHT + + , listOf + + , crlf + , sp + , lws + + , token + , separators + , quotedStr + , qvalue + + , atMost + , manyCharsTill ) where - -import qualified Data.ByteString.Lazy.Char8 as B -import Data.ByteString.Lazy.Char8 (ByteString) -import Data.List -import Network.HTTP.Lucu.Parser - -isCtl :: Char -> Bool +import Control.Applicative +import Control.Applicative.Unicode hiding ((∅)) +import Control.Monad.Unicode +import Data.Ascii (Ascii) +import qualified Data.Ascii as A +import Data.Attoparsec.Char8 as P hiding (scan) +import qualified Data.Attoparsec.FastSet as FS +import qualified Data.ByteString.Char8 as BS +import qualified Data.ByteString.Lazy.Char8 as LS +import qualified Data.ByteString.Lazy.Internal as LS +import Data.Foldable +import Data.Monoid +import Data.Monoid.Unicode +import qualified Data.Sequence as S +import Data.Sequence.Unicode hiding ((∅)) +import Prelude.Unicode + +-- |@'isCtl' c@ is 'Prelude.False' iff @0x20 <= c < 0x7F@. +isCtl ∷ Char → Bool +{-# INLINE isCtl #-} isCtl c - | c < '\x1f' = True - | c >= '\x7f' = True + | c ≤ '\x1f' = True + | c > '\x7f' = True | otherwise = False +-- |@'isText'@ is equivalent to @'not' '.' 'isCtl'@ +isText ∷ Char → Bool +{-# INLINE isText #-} +isText = (¬) ∘ isCtl -isSeparator :: Char -> Bool -isSeparator c = elem c "()<>@,;:\\\"/[]?={} \t" - - -isChar :: Char -> Bool -isChar c - | c <= '\x7f' = True - | otherwise = False - - -isToken :: Char -> Bool -isToken c = not (isCtl c || isSeparator c) - - -listOf :: Parser a -> Parser [a] -listOf p = do many lws - sepBy p (do many lws - char ',' - many lws) - - -token :: Parser String -token = many1 $ satisfy isToken - - -lws :: Parser String -lws = do s <- option "" crlf - xs <- many1 (sp <|> ht) - return (s ++ xs) - - -text :: Parser Char -text = satisfy (\ c -> not (isCtl c)) - - -separator :: Parser Char -separator = satisfy isSeparator - - -quotedStr :: Parser String -quotedStr = do char '"' - xs <- many (qdtext <|> quotedPair) - char '"' - return $ foldr (++) "" xs +-- |@'isSeparator' c@ is 'Prelude.True' iff c is one of HTTP +-- separators. +isSeparator ∷ Char → Bool +{-# INLINE isSeparator #-} +isSeparator = flip FS.memberChar set where - qdtext = do c <- satisfy (/= '"') - return [c] - - quotedPair = do q <- char '\\' - c <- satisfy isChar - return [c] + {-# NOINLINE set #-} + set = FS.charClass "()<>@,;:\\\"/[]?={}\x20\x09" + +-- |@'isChar' c@ is 'Prelude.True' iff @c <= 0x7f@. +isChar ∷ Char → Bool +{-# INLINE isChar #-} +isChar = (≤ '\x7F') + +-- |@'isToken' c@ is equivalent to @not ('isCtl' c || 'isSeparator' +-- c)@ +isToken ∷ Char → Bool +{-# INLINE isToken #-} +isToken !c + = (¬) (isCtl c ∨ isSeparator c) + +-- |@'listOf' p@ is similar to @'sepBy' p ('char' \',\')@ but it +-- allows any occurrences of 'lws' before and after each tokens. +listOf ∷ Parser a → Parser [a] +{-# INLINEABLE listOf #-} +listOf p + = do skipMany lws + sepBy p $ do skipMany lws + _ <- char ',' + skipMany lws + +-- |'token' is similar to @'takeWhile1' 'isToken'@ +token ∷ Parser Ascii +{-# INLINE token #-} +token = A.unsafeFromByteString <$> takeWhile1 isToken + +-- |The CRLF: 0x0D 0x0A. +crlf ∷ Parser () +{-# INLINE crlf #-} +crlf = string "\x0D\x0A" ≫ return () + +-- |The SP: 0x20. +sp ∷ Parser () +{-# INLINE sp #-} +sp = char '\x20' ≫ return () + +-- |HTTP LWS: crlf? (sp | ht)+ +lws ∷ Parser () +{-# INLINEABLE lws #-} +lws = do option () crlf + _ ← takeWhile1 isSPHT + return () + +-- |Returns 'True' for SP and HT. +isSPHT ∷ Char → Bool +{-# INLINE isSPHT #-} +isSPHT '\x20' = True +isSPHT '\x09' = True +isSPHT _ = False + +-- |@'separators'@ is similar to @'takeWhile1' 'isSeparator'@. +separators ∷ Parser Ascii +{-# INLINE separators #-} +separators = A.unsafeFromByteString <$> takeWhile1 isSeparator + +-- |'quotedStr' accepts a string surrounded by double quotation +-- marks. Quotes can be escaped by backslashes. +quotedStr ∷ Parser Ascii +{-# INLINEABLE quotedStr #-} +quotedStr = try $ + do _ ← char '"' + xs ← P.many (qdtext <|> quotedPair) + _ ← char '"' + return $ A.unsafeFromByteString $ BS.pack xs + where + qdtext ∷ Parser Char + {-# INLINE qdtext #-} + qdtext = satisfy (\c → c ≢ '"' ∧ (¬) (isCtl c)) + + quotedPair ∷ Parser Char + {-# INLINE quotedPair #-} + quotedPair = char '\\' ≫ satisfy isChar + +-- |'qvalue' accepts a so-called qvalue. +qvalue ∷ Parser Double +{-# INLINEABLE qvalue #-} +qvalue = do x ← char '0' + xs ← option "" $ + do y ← char '.' + ys ← atMost 3 digit + return (y:ys) + return $ read (x:xs) + <|> + do x ← char '1' + xs ← option "" $ + do y ← char '.' + ys ← atMost 3 (char '0') + return (y:ys) + return $ read (x:xs) + +-- |@'atMost' n v@ is like @'P.many' v@ but applies the given action +-- at most @n@ times. +atMost ∷ Alternative f ⇒ Int → f a → f [a] +{-# INLINE atMost #-} +atMost 0 _ = pure [] +atMost n v = ( (:) <$> v ⊛ atMost (n-1) v ) + <|> + pure [] + + +data CharAccumState + = CharAccumState { + casChunks ∷ !(S.Seq BS.ByteString) + , casLastChunk ∷ !(S.Seq Char) + } + +instance Monoid CharAccumState where + mempty + = CharAccumState { + casChunks = (∅) + , casLastChunk = (∅) + } + mappend a b + = b { + casChunks = (casChunks a ⊳ lastChunk a) ⋈ casChunks b + } + +lastChunk ∷ CharAccumState → BS.ByteString +{-# INLINE lastChunk #-} +lastChunk = BS.pack ∘ toList ∘ casLastChunk + +snoc ∷ CharAccumState → Char → CharAccumState +{-# INLINEABLE snoc #-} +snoc cas c + | S.length (casLastChunk cas) ≥ LS.defaultChunkSize + = cas { + casChunks = casChunks cas ⊳ lastChunk cas + , casLastChunk = S.singleton c + } + | otherwise + = cas { + casLastChunk = casLastChunk cas ⊳ c + } + +finish ∷ CharAccumState → LS.ByteString +{-# INLINEABLE finish #-} +finish cas + = let chunks = toList $ casChunks cas ⊳ lastChunk cas + str = LS.fromChunks chunks + in + str + +manyCharsTill ∷ ∀m b. (Monad m, Alternative m) + ⇒ m Char + → m b + → m LS.ByteString +{-# INLINEABLE manyCharsTill #-} +manyCharsTill p end = scan (∅) + where + scan ∷ CharAccumState → m LS.ByteString + {-# INLINE scan #-} + scan s + = (end *> pure (finish s)) + <|> + (scan =≪ (snoc s <$> p))