X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FRFC2231.hs;h=1046c5df516f47ebcb06bcaf1ea1228a381cba72;hb=a362be1c8664306b970c32e1df9b62081498feb1;hp=1d49646947dacb0d81a848b8a9d658ccda541370;hpb=d002a49e8d298f27367f529594c668d04984fdf6;p=Lucu.git diff --git a/Network/HTTP/Lucu/RFC2231.hs b/Network/HTTP/Lucu/RFC2231.hs index 1d49646..1046c5d 100644 --- a/Network/HTTP/Lucu/RFC2231.hs +++ b/Network/HTTP/Lucu/RFC2231.hs @@ -2,18 +2,20 @@ DoAndIfThenElse , OverloadedStrings , RecordWildCards - , ScopedTypeVariables , UnicodeSyntax #-} --- |Provide facilities to encode/decode MIME parameter values in +-- |Provide functionalities to encode/decode MIME parameter values in -- character sets other than US-ASCII. See: --- http://www.faqs.org/rfcs/rfc2231.html +-- +-- +-- You usually don't have to use this module directly. module Network.HTTP.Lucu.RFC2231 - ( printParams - , paramsP + ( printMIMEParams + , mimeParams ) where import Control.Applicative +import Control.Monad hiding (mapM) import Control.Monad.Unicode import Data.Ascii (Ascii, CIAscii, AsciiBuilder) import qualified Data.Ascii as A @@ -30,8 +32,8 @@ import qualified Data.Sequence as S import Data.Sequence.Unicode hiding ((∅)) import Data.Text (Text) import qualified Data.Text as T -import qualified Data.Text.ICU.Convert as TC import Data.Text.Encoding +import Data.Text.Encoding.Error import Data.Traversable import Data.Word import Network.HTTP.Lucu.Parser.Http @@ -39,26 +41,33 @@ import Network.HTTP.Lucu.Utils import Prelude hiding (concat, mapM, takeWhile) import Prelude.Unicode -printParams ∷ Map CIAscii Text → AsciiBuilder -printParams params - | M.null params = (∅) - | otherwise = A.toAsciiBuilder "; " ⊕ - joinWith "; " (map printPair $ M.toList params) +-- |Convert MIME parameter values to an 'AsciiBuilder'. +printMIMEParams ∷ Map CIAscii Text → AsciiBuilder +{-# INLINEABLE printMIMEParams #-} +printMIMEParams m = M.foldlWithKey f (∅) m + -- THINKME: Use foldlWithKey' for newer Data.Map + where + f ∷ AsciiBuilder → CIAscii → Text → AsciiBuilder + {-# INLINE f #-} + f ab k v = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v -printPair ∷ (CIAscii, Text) → AsciiBuilder -printPair (name, value) +printPair ∷ CIAscii → Text → AsciiBuilder +{-# INLINEABLE printPair #-} +printPair name value | T.any (> '\xFF') value = printPairInUTF8 name value | otherwise = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value) printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder +{-# INLINEABLE printPairInUTF8 #-} printPairInUTF8 name value = A.toAsciiBuilder (A.fromCIAscii name) ⊕ A.toAsciiBuilder "*=utf-8''" ⊕ escapeUnsafeChars (encodeUtf8 value) (∅) printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder +{-# INLINEABLE printPairInAscii #-} printPairInAscii name value = A.toAsciiBuilder (A.fromCIAscii name) ⊕ A.toAsciiBuilder "=" ⊕ @@ -68,6 +77,7 @@ printPairInAscii name value A.toAsciiBuilder value escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder +{-# INLINEABLE escapeUnsafeChars #-} escapeUnsafeChars bs b = case BS.uncons bs of Nothing → b @@ -78,15 +88,18 @@ escapeUnsafeChars bs b b ⊕ toHex (fromIntegral $ fromEnum c) toHex ∷ Word8 → AsciiBuilder +{-# INLINEABLE toHex #-} toHex o = A.toAsciiBuilder "%" ⊕ A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8) , toHex' (o .&. 0x0F) ]) - -toHex' ∷ Word8 → Char -toHex' o - | o ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral o - | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (o - 0x0A) - + where + toHex' ∷ Word8 → Char + {-# INLINEABLE toHex' #-} + toHex' h + | h ≤ 0x09 = toEnum $ fromIntegral + $ fromEnum '0' + fromIntegral h + | otherwise = toEnum $ fromIntegral + $ fromEnum 'A' + fromIntegral (h - 0x0A) data ExtendedParam = InitialEncodedParam { @@ -106,18 +119,21 @@ data ExtendedParam } section ∷ ExtendedParam → Integer +{-# INLINE section #-} section (InitialEncodedParam {..}) = 0 section ep = epSection ep -paramsP ∷ Parser (Map CIAscii Text) -paramsP = decodeParams =≪ P.many (try paramP) +-- |'Parser' for MIME parameter values. +mimeParams ∷ Parser (Map CIAscii Text) +{-# INLINEABLE mimeParams #-} +mimeParams = decodeParams =≪ P.many (try paramP) paramP ∷ Parser ExtendedParam paramP = do skipMany lws - _ ← char ';' + void $ char ';' skipMany lws epm ← nameP - _ ← char '=' + void $ char '=' case epm of (name, 0, True) → do (charset, payload) ← initialEncodedValue @@ -132,40 +148,37 @@ paramP = do skipMany lws nameP ∷ Parser (CIAscii, Integer, Bool) nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$> takeWhile1 (\c → isToken c ∧ c ≢ '*') - sect ← option 0 $ - try $ - do _ ← char '*' - n ← decimal - return n - isEncoded ← option False $ - do _ ← char '*' - return True + sect ← option 0 $ try (char '*' *> decimal ) + isEncoded ← option False $ try (char '*' *> pure True) return (name, sect, isEncoded) initialEncodedValue ∷ Parser (CIAscii, BS.ByteString) initialEncodedValue = do charset ← metadata - _ ← char '\'' - _ ← metadata -- Ignore the language tag - _ ← char '\'' + void $ char '\'' + void $ metadata -- Ignore the language tag + void $ char '\'' payload ← encodedPayload if charset ≡ "" then -- NOTE: I'm not sure this is the right thing, but RFC - -- 2231 doesn't tell us what should we do when the + -- 2231 doesn't tell us what we should do when the -- charset is omitted. return ("US-ASCII", payload) + -- FIXME: Rethink about this behaviour. else return (charset, payload) where metadata ∷ Parser CIAscii metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$> - takeWhile (\c → isToken c ∧ c ≢ '\'') + takeWhile (\c → c ≢ '\'' ∧ isToken c) encodedPayload ∷ Parser BS.ByteString +{-# INLINE encodedPayload #-} encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars) hexChar ∷ Parser BS.ByteString -hexChar = do _ ← char '%' +{-# INLINEABLE hexChar #-} +hexChar = do void $ char '%' h ← satisfy isHexChar l ← satisfy isHexChar return $ BS.singleton $ hexToChar h l @@ -174,27 +187,32 @@ isHexChar ∷ Char → Bool isHexChar = inClass "0-9a-fA-F" hexToChar ∷ Char → Char → Char +{-# INLINE hexToChar #-} hexToChar h l = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l hexToInt ∷ Char → Int +{-# INLINEABLE hexToInt #-} hexToInt c | c ≤ '9' = ord c - ord '0' | c ≤ 'F' = ord c - ord 'A' + 10 | otherwise = ord c - ord 'a' + 10 rawChars ∷ Parser BS.ByteString +{-# INLINE rawChars #-} rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%') decodeParams ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii Text) +{-# INLINE decodeParams #-} decodeParams = (mapM decodeSections =≪) ∘ sortBySection -sortBySection ∷ ∀m. Monad m +sortBySection ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii (Map Integer ExtendedParam)) sortBySection = flip go (∅) where - go ∷ [ExtendedParam] + go ∷ Monad m + ⇒ [ExtendedParam] → Map CIAscii (Map Integer ExtendedParam) → m (Map CIAscii (Map Integer ExtendedParam)) go [] m = return m @@ -206,12 +224,13 @@ sortBySection = flip go (∅) in go xs m' Just s - → case M.insertLookupWithKey (\_ s' _ → s') (section x) x s of - (Nothing, s') - → let m' = M.insert (epName x) s' m + → case M.lookup (section x) s of + Nothing + → let s' = M.insert (section x) x s + m' = M.insert (epName x) s' m in go xs m' - (Just _, _) + Just _ → fail (concat [ "Duplicate section " , show $ section x , " for parameter '" @@ -219,10 +238,11 @@ sortBySection = flip go (∅) , "'" ]) -decodeSections ∷ ∀m. Monad m ⇒ Map Integer ExtendedParam → m Text +decodeSections ∷ Monad m ⇒ Map Integer ExtendedParam → m Text decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅) where - toSeq ∷ Map Integer ExtendedParam + toSeq ∷ Monad m + ⇒ Map Integer ExtendedParam → Integer → Seq ExtendedParam → m (Seq ExtendedParam) @@ -241,50 +261,61 @@ decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅) , "'" ]) - decodeSeq ∷ Seq ExtendedParam → m Text + decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text decodeSeq sects = case S.viewl sects of EmptyL → fail "decodeSeq: internal error: empty seq" InitialEncodedParam {..} :< xs - → do conv ← openConv epCharset - let t = TC.toUnicode conv epPayload - decodeSeq' (Just conv) xs $ S.singleton t + → do d ← getDecoder epCharset + t ← decodeStr d epPayload + decodeSeq' (Just d) xs $ S.singleton t ContinuedEncodedParam {..} :< _ - → fail "decodeSeq: internal error: ContinuedEncodedParam at section 0" + → fail "decodeSeq: internal error: CEP at section 0" AsciiParam {..} :< xs → let t = A.toText apPayload in decodeSeq' Nothing xs $ S.singleton t - decodeSeq' ∷ Maybe (TC.Converter) + decodeSeq' ∷ Monad m + ⇒ Maybe Decoder → Seq ExtendedParam → Seq Text → m Text - decodeSeq' convM sects chunks + decodeSeq' decoder sects chunks = case S.viewl sects of EmptyL → return $ T.concat $ toList chunks InitialEncodedParam {..} :< _ - → fail "decodeSeq': internal error: InitialEncodedParam at section > 0" + → fail "decodeSeq': internal error: IEP at section > 0" ContinuedEncodedParam {..} :< xs - → case convM of - Just conv - → let t = TC.toUnicode conv epPayload - in - decodeSeq' convM xs $ chunks ⊳ t + → case decoder of + Just d + → do t ← decodeStr d epPayload + decodeSeq' decoder xs $ chunks ⊳ t Nothing → fail (concat [ "Section " , show epSection , " for parameter '" , A.toString $ A.fromCIAscii epName - , "' is encoded but its section 0 is not" + , "' is encoded but its first section is not" ]) AsciiParam {..} :< xs → let t = A.toText apPayload in - decodeSeq' convM xs $ chunks ⊳ t + decodeSeq' decoder xs $ chunks ⊳ t + +type Decoder = BS.ByteString → Either UnicodeException Text + +decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text +decodeStr decoder str + = case decoder str of + Right t → return t + Left e → fail $ show e - openConv ∷ CIAscii → m TC.Converter - openConv charset - = fail "FIXME" +getDecoder ∷ Monad m ⇒ CIAscii → m Decoder +getDecoder charset + | charset ≡ "UTF-8" = return decodeUtf8' + | charset ≡ "US-ASCII" = return decodeUtf8' + | otherwise = fail $ "No decoders found for charset: " + ⧺ A.toString (A.fromCIAscii charset)