From: PHO Date: Thu, 25 Aug 2011 15:13:03 +0000 (+0900) Subject: Still working on RFC2231... X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=d002a49;p=Lucu.git Still working on RFC2231... Ditz-issue: 8959dadc07db1bd363283dee401073f6e48dc7fa --- diff --git a/Lucu.cabal b/Lucu.cabal index 3ac356e..f5dddee 100644 --- a/Lucu.cabal +++ b/Lucu.cabal @@ -62,6 +62,7 @@ Library network == 2.3.*, stm == 2.2.*, text == 0.11.*, + text-icu == 0.6.*, time == 1.2.*, time-http == 0.1.*, unix == 2.4.*, diff --git a/Network/HTTP/Lucu/RFC2231.hs b/Network/HTTP/Lucu/RFC2231.hs index a8e29cb..1d49646 100644 --- a/Network/HTTP/Lucu/RFC2231.hs +++ b/Network/HTTP/Lucu/RFC2231.hs @@ -25,10 +25,12 @@ import Data.Foldable import Data.Map (Map) import qualified Data.Map as M import Data.Monoid.Unicode +import Data.Sequence (Seq, ViewL(..)) import qualified Data.Sequence as S import Data.Sequence.Unicode hiding ((∅)) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.ICU.Convert as TC import Data.Text.Encoding import Data.Traversable import Data.Word @@ -141,12 +143,19 @@ nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$> return (name, sect, isEncoded) initialEncodedValue ∷ Parser (CIAscii, BS.ByteString) -initialEncodedValue = do charset ← metadata - _ ← char '\'' - _ ← metadata -- Ignore the language tag - _ ← char '\'' - payload ← encodedPayload - return (charset, payload) +initialEncodedValue + = do charset ← metadata + _ ← char '\'' + _ ← metadata -- Ignore the language tag + _ ← char '\'' + payload ← encodedPayload + if charset ≡ "" then + -- NOTE: I'm not sure this is the right thing, but RFC + -- 2231 doesn't tell us what should we do when the + -- charset is omitted. + return ("US-ASCII", payload) + else + return (charset, payload) where metadata ∷ Parser CIAscii metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$> @@ -211,16 +220,19 @@ sortBySection = flip go (∅) ]) decodeSections ∷ ∀m. Monad m ⇒ Map Integer ExtendedParam → m Text -decodeSections = flip (flip go 0) (∅) +decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅) where - go ∷ Map Integer ExtendedParam → Integer → S.Seq Text → m Text - go m expectedSect chunks + toSeq ∷ Map Integer ExtendedParam + → Integer + → Seq ExtendedParam + → m (Seq ExtendedParam) + toSeq m expectedSect sects = case M.minViewWithKey m of Nothing - → return $ T.concat $ toList chunks + → return sects Just ((sect, p), m') | sect ≡ expectedSect - → error "FIXME" + → toSeq m' (expectedSect + 1) (sects ⊳ p) | otherwise → fail (concat [ "Missing section " , show $ section p @@ -228,3 +240,51 @@ decodeSections = flip (flip go 0) (∅) , A.toString $ A.fromCIAscii $ epName p , "'" ]) + + decodeSeq ∷ Seq ExtendedParam → m Text + decodeSeq sects + = case S.viewl sects of + EmptyL + → fail "decodeSeq: internal error: empty seq" + InitialEncodedParam {..} :< xs + → do conv ← openConv epCharset + let t = TC.toUnicode conv epPayload + decodeSeq' (Just conv) xs $ S.singleton t + ContinuedEncodedParam {..} :< _ + → fail "decodeSeq: internal error: ContinuedEncodedParam at section 0" + AsciiParam {..} :< xs + → let t = A.toText apPayload + in + decodeSeq' Nothing xs $ S.singleton t + + decodeSeq' ∷ Maybe (TC.Converter) + → Seq ExtendedParam + → Seq Text + → m Text + decodeSeq' convM sects chunks + = case S.viewl sects of + EmptyL + → return $ T.concat $ toList chunks + InitialEncodedParam {..} :< _ + → fail "decodeSeq': internal error: InitialEncodedParam at section > 0" + ContinuedEncodedParam {..} :< xs + → case convM of + Just conv + → let t = TC.toUnicode conv epPayload + in + decodeSeq' convM xs $ chunks ⊳ t + Nothing + → fail (concat [ "Section " + , show epSection + , " for parameter '" + , A.toString $ A.fromCIAscii epName + , "' is encoded but its section 0 is not" + ]) + AsciiParam {..} :< xs + → let t = A.toText apPayload + in + decodeSeq' convM xs $ chunks ⊳ t + + openConv ∷ CIAscii → m TC.Converter + openConv charset + = fail "FIXME"