- paramP ∷ Parser ExtendedParam
- paramP = do skipMany lws
- _ ← char ';'
- skipMany lws
- epm ← nameP
- _ ← char '='
- case epm of
- (name, 0, True)
- → do (charset, payload) ← initialEncodedValue
- return $ InitialEncodedParam name charset payload
- (name, section, True)
- → do payload ← encodedPayload
- return $ ContinuedEncodedParam name section payload
- (name, section, False)
- → do payload ← token <|> quotedStr
- return $ AsciiParam name section payload
-
- nameP ∷ Parser (CIAscii, Integer, Bool)
- nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
- takeWhile1 (\c → isToken c ∧ c ≢ '*')
- section ← option 0 $
- try $
- do _ ← char '*'
- n ← decimal
- return n
- isEncoded ← option False $
- do _ ← char '*'
- return True
- return (name, section, isEncoded)
-
- initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
- initialEncodedValue = do charset ← metadata
- _ ← char '\''
- _ ← metadata -- Ignore the language tag
- _ ← char '\''
- payload ← encodedPayload
- return (charset, payload)
- where
- metadata ∷ Parser CIAscii
- metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
- takeWhile (\c → isToken c ∧ c ≢ '\'')
-
- encodedPayload ∷ Parser BS.ByteString
- encodedPayload = BS.concat <$> P.many (hexChar <|> literal)
- where
- hexChar ∷ Parser BS.ByteString
- hexChar = do _ ← char '%'
- h ← satisfy isHexChar
- l ← satisfy isHexChar
- return $ BS.singleton $ hexToChar h l
-
- isHexChar ∷ Char → Bool
- isHexChar = inClass "0-9a-fA-F"
-
- hexToChar ∷ Char → Char → Char
- hexToChar h l
- = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
-
- hexToInt ∷ Char → Int
- hexToInt c
- | c ≤ '9' = ord c - ord '0'
- | c ≤ 'F' = ord c - ord 'A' + 10
- | otherwise = ord c - ord 'a' + 10
-
- literal ∷ Parser BS.ByteString
- literal = takeWhile1 (\c → isToken c ∧ c ≢ '%')
-
- decodeParams ∷ [ExtendedParam] → Map CIAscii Text
- decodeParams = error "FIXME"
+ metadata ∷ Parser CIAscii
+ metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
+ takeWhile (\c → isToken c ∧ c ≢ '\'')
+
+encodedPayload ∷ Parser BS.ByteString
+encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
+
+hexChar ∷ Parser BS.ByteString
+hexChar = do _ ← char '%'
+ h ← satisfy isHexChar
+ l ← satisfy isHexChar
+ return $ BS.singleton $ hexToChar h l
+
+isHexChar ∷ Char → Bool
+isHexChar = inClass "0-9a-fA-F"
+
+hexToChar ∷ Char → Char → Char
+hexToChar h l
+ = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
+
+hexToInt ∷ Char → Int
+hexToInt c
+ | c ≤ '9' = ord c - ord '0'
+ | c ≤ 'F' = ord c - ord 'A' + 10
+ | otherwise = ord c - ord 'a' + 10
+
+rawChars ∷ Parser BS.ByteString
+rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
+
+decodeParams ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii Text)
+decodeParams = (mapM decodeSections =≪) ∘ sortBySection
+
+sortBySection ∷ ∀m. Monad m
+ ⇒ [ExtendedParam]
+ → m (Map CIAscii (Map Integer ExtendedParam))
+sortBySection = flip go (∅)
+ where
+ go ∷ [ExtendedParam]
+ → Map CIAscii (Map Integer ExtendedParam)
+ → m (Map CIAscii (Map Integer ExtendedParam))
+ go [] m = return m
+ go (x:xs) m
+ = case M.lookup (epName x) m of
+ Nothing
+ → let s = M.singleton (section x) x
+ m' = M.insert (epName x) s m
+ in
+ go xs m'
+ Just s
+ → case M.insertLookupWithKey (\_ s' _ → s') (section x) x s of
+ (Nothing, s')
+ → let m' = M.insert (epName x) s' m
+ in
+ go xs m'
+ (Just _, _)
+ → fail (concat [ "Duplicate section "
+ , show $ section x
+ , " for parameter '"
+ , A.toString $ A.fromCIAscii $ epName x
+ , "'"
+ ])
+
+decodeSections ∷ ∀m. Monad m ⇒ Map Integer ExtendedParam → m Text
+decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
+ where
+ toSeq ∷ Map Integer ExtendedParam
+ → Integer
+ → Seq ExtendedParam
+ → m (Seq ExtendedParam)
+ toSeq m expectedSect sects
+ = case M.minViewWithKey m of
+ Nothing
+ → return sects
+ Just ((sect, p), m')
+ | sect ≡ expectedSect
+ → toSeq m' (expectedSect + 1) (sects ⊳ p)
+ | otherwise
+ → fail (concat [ "Missing section "
+ , show $ section p
+ , " for parameter '"
+ , A.toString $ A.fromCIAscii $ epName p
+ , "'"
+ ])
+
+ decodeSeq ∷ Seq ExtendedParam → m Text
+ decodeSeq sects
+ = case S.viewl sects of
+ EmptyL
+ → fail "decodeSeq: internal error: empty seq"
+ InitialEncodedParam {..} :< xs
+ → do conv ← openConv epCharset
+ let t = TC.toUnicode conv epPayload
+ decodeSeq' (Just conv) xs $ S.singleton t
+ ContinuedEncodedParam {..} :< _
+ → fail "decodeSeq: internal error: CEP at section 0"
+ AsciiParam {..} :< xs
+ → let t = A.toText apPayload
+ in
+ decodeSeq' Nothing xs $ S.singleton t
+
+ decodeSeq' ∷ Maybe (TC.Converter)
+ → Seq ExtendedParam
+ → Seq Text
+ → m Text
+ decodeSeq' convM sects chunks
+ = case S.viewl sects of
+ EmptyL
+ → return $ T.concat $ toList chunks
+ InitialEncodedParam {..} :< _
+ → fail "decodeSeq': internal error: IEP at section > 0"
+ ContinuedEncodedParam {..} :< xs
+ → case convM of
+ Just conv
+ → let t = TC.toUnicode conv epPayload
+ in
+ decodeSeq' convM xs $ chunks ⊳ t
+ Nothing
+ → fail (concat [ "Section "
+ , show epSection
+ , " for parameter '"
+ , A.toString $ A.fromCIAscii epName
+ , "' is encoded but its first section is not"
+ ])
+ AsciiParam {..} :< xs
+ → let t = A.toText apPayload
+ in
+ decodeSeq' convM xs $ chunks ⊳ t
+
+ openConv ∷ CIAscii → m TC.Converter
+ openConv charset
+ = let cs = A.toString $ A.fromCIAscii charset
+ open' = TC.open cs (Just True)
+ in
+ case unsafePerformIO $ E.try open' of
+ Right conv → return conv
+ Left err → fail $ show (err ∷ ICUError)