+ {-# INLINEABLE toHex' #-}
+ toHex' h
+ | h ≤ 0x09 = toEnum $ fromIntegral
+ $ fromEnum '0' + fromIntegral h
+ | otherwise = toEnum $ fromIntegral
+ $ fromEnum 'A' + fromIntegral (h - 0x0A)
+
+data ExtendedParam
+ = InitialEncodedParam {
+ epName ∷ !CIAscii
+ , epCharset ∷ !CIAscii
+ , epPayload ∷ !BS.ByteString
+ }
+ | ContinuedEncodedParam {
+ epName ∷ !CIAscii
+ , epSection ∷ !Integer
+ , epPayload ∷ !BS.ByteString
+ }
+ | AsciiParam {
+ epName ∷ !CIAscii
+ , epSection ∷ !Integer
+ , apPayload ∷ !Ascii
+ }
+
+section ∷ ExtendedParam → Integer
+{-# INLINE section #-}
+section (InitialEncodedParam {..}) = 0
+section ep = epSection ep
+
+-- |'Parser' for parameter values.
+paramsP ∷ Parser (Map CIAscii Text)
+{-# INLINEABLE paramsP #-}
+paramsP = decodeParams =≪ P.many (try paramP)
+
+paramP ∷ Parser ExtendedParam
+paramP = do skipMany lws
+ void $ char ';'
+ skipMany lws
+ epm ← nameP
+ void $ char '='
+ case epm of
+ (name, 0, True)
+ → do (charset, payload) ← initialEncodedValue
+ return $ InitialEncodedParam name charset payload
+ (name, sect, True)
+ → do payload ← encodedPayload
+ return $ ContinuedEncodedParam name sect payload
+ (name, sect, False)
+ → do payload ← token <|> quotedStr
+ return $ AsciiParam name sect payload
+
+nameP ∷ Parser (CIAscii, Integer, Bool)
+nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
+ takeWhile1 (\c → isToken c ∧ c ≢ '*')
+ sect ← option 0 $ try (char '*' *> decimal )
+ isEncoded ← option False $ try (char '*' *> pure True)
+ return (name, sect, isEncoded)
+
+initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
+initialEncodedValue
+ = do charset ← metadata
+ void $ char '\''
+ void $ metadata -- Ignore the language tag
+ void $ char '\''
+ payload ← encodedPayload
+ if charset ≡ "" then
+ -- NOTE: I'm not sure this is the right thing, but RFC
+ -- 2231 doesn't tell us what we should do when the
+ -- charset is omitted.
+ return ("US-ASCII", payload)
+ else
+ return (charset, payload)
+ where
+ metadata ∷ Parser CIAscii
+ metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
+ takeWhile (\c → c ≢ '\'' ∧ isToken c)
+
+encodedPayload ∷ Parser BS.ByteString
+{-# INLINE encodedPayload #-}
+encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
+
+hexChar ∷ Parser BS.ByteString
+{-# INLINEABLE hexChar #-}
+hexChar = do void $ char '%'
+ h ← satisfy isHexChar
+ l ← satisfy isHexChar
+ return $ BS.singleton $ hexToChar h l
+
+isHexChar ∷ Char → Bool
+isHexChar = inClass "0-9a-fA-F"
+
+hexToChar ∷ Char → Char → Char
+{-# INLINE hexToChar #-}
+hexToChar h l
+ = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
+
+hexToInt ∷ Char → Int
+{-# INLINEABLE hexToInt #-}
+hexToInt c
+ | c ≤ '9' = ord c - ord '0'
+ | c ≤ 'F' = ord c - ord 'A' + 10
+ | otherwise = ord c - ord 'a' + 10
+
+rawChars ∷ Parser BS.ByteString
+{-# INLINE rawChars #-}
+rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
+
+decodeParams ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii Text)
+{-# INLINE decodeParams #-}
+decodeParams = (mapM decodeSections =≪) ∘ sortBySection
+
+sortBySection ∷ ∀m. Monad m
+ ⇒ [ExtendedParam]
+ → m (Map CIAscii (Map Integer ExtendedParam))
+sortBySection = flip go (∅)
+ where
+ go ∷ [ExtendedParam]
+ → Map CIAscii (Map Integer ExtendedParam)
+ → m (Map CIAscii (Map Integer ExtendedParam))
+ go [] m = return m
+ go (x:xs) m
+ = case M.lookup (epName x) m of
+ Nothing
+ → let s = M.singleton (section x) x
+ m' = M.insert (epName x) s m
+ in
+ go xs m'
+ Just s
+ → case M.lookup (section x) s of
+ Nothing
+ → let s' = M.insert (section x) x s
+ m' = M.insert (epName x) s' m
+ in
+ go xs m'
+ Just _
+ → fail (concat [ "Duplicate section "
+ , show $ section x
+ , " for parameter '"
+ , A.toString $ A.fromCIAscii $ epName x
+ , "'"
+ ])
+
+decodeSections ∷ ∀m. Monad m ⇒ Map Integer ExtendedParam → m Text
+decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
+ where
+ toSeq ∷ Map Integer ExtendedParam
+ → Integer
+ → Seq ExtendedParam
+ → m (Seq ExtendedParam)
+ toSeq m expectedSect sects
+ = case M.minViewWithKey m of
+ Nothing
+ → return sects
+ Just ((sect, p), m')
+ | sect ≡ expectedSect
+ → toSeq m' (expectedSect + 1) (sects ⊳ p)
+ | otherwise
+ → fail (concat [ "Missing section "
+ , show $ section p
+ , " for parameter '"
+ , A.toString $ A.fromCIAscii $ epName p
+ , "'"
+ ])
+
+ decodeSeq ∷ Seq ExtendedParam → m Text
+ decodeSeq sects
+ = case S.viewl sects of
+ EmptyL
+ → fail "decodeSeq: internal error: empty seq"
+ InitialEncodedParam {..} :< xs
+ → do conv ← openConv epCharset
+ let t = TC.toUnicode conv epPayload
+ decodeSeq' (Just conv) xs $ S.singleton t
+ ContinuedEncodedParam {..} :< _
+ → fail "decodeSeq: internal error: CEP at section 0"
+ AsciiParam {..} :< xs
+ → let t = A.toText apPayload
+ in
+ decodeSeq' Nothing xs $ S.singleton t
+
+ decodeSeq' ∷ Maybe (TC.Converter)
+ → Seq ExtendedParam
+ → Seq Text
+ → m Text
+ decodeSeq' convM sects chunks
+ = case S.viewl sects of
+ EmptyL
+ → return $ T.concat $ toList chunks
+ InitialEncodedParam {..} :< _
+ → fail "decodeSeq': internal error: IEP at section > 0"
+ ContinuedEncodedParam {..} :< xs
+ → case convM of
+ Just conv
+ → let t = TC.toUnicode conv epPayload
+ in
+ decodeSeq' convM xs $ chunks ⊳ t
+ Nothing
+ → fail (concat [ "Section "
+ , show epSection
+ , " for parameter '"
+ , A.toString $ A.fromCIAscii epName
+ , "' is encoded but its first section is not"
+ ])
+ AsciiParam {..} :< xs
+ → let t = A.toText apPayload
+ in
+ decodeSeq' convM xs $ chunks ⊳ t
+
+ openConv ∷ CIAscii → m TC.Converter
+ openConv charset
+ = let cs = A.toString $ A.fromCIAscii charset
+ open' = TC.open cs (Just True)
+ in
+ case unsafePerformIO $ E.try open' of
+ Right conv → return conv
+ Left err → fail $ show (err ∷ ICUError)