[youtube] Correct invalid JSON (Fixes #2353)

author Philipp Hagemeister <phihag@phihag.de>

Sun, 9 Feb 2014 16:56:10 +0000 (17:56 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Sun, 9 Feb 2014 16:56:10 +0000 (17:56 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Sun, 9 Feb 2014 16:56:10 +0000 (17:56 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Sun, 9 Feb 2014 16:56:10 +0000 (17:56 +0100)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 2c0c75604b96cfd08a283c0d93ef36f85b525922..84fca8ba0b2577696877c117a13fcc0a5ce40735 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -271,8 +271,11 @@ class InfoExtractor(object):
  
      def _download_json(self, url_or_request, video_id,
                         note=u'Downloading JSON metadata',
-                       errnote=u'Unable to download JSON metadata'):
+                       errnote=u'Unable to download JSON metadata',
+                       transform_source=None):
          json_string = self._download_webpage(url_or_request, video_id, note, errnote)
+        if transform_source:
+            json_string = transform_source(json_string)
          try:
              return json.loads(json_string)
          except ValueError as ve:
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index e038c7752219e4dc9ee2fed84269595188b1b361..18a92e1019e81a38c6d081e0c18787f02de4baa9 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -34,6 +34,7 @@ from ..utils import (
      unified_strdate,
      orderedSet,
      write_json_file,
+    uppercase_escape,
  )
  
  class YoutubeBaseInfoExtractor(InfoExtractor):
@@ -1590,11 +1591,10 @@ class YoutubeChannelIE(InfoExtractor):
              # Download all channel pages using the json-based channel_ajax query
              for pagenum in itertools.count(1):
                  url = self._MORE_PAGES_URL % (pagenum, channel_id)
-                page = self._download_webpage(url, channel_id,
-                                              u'Downloading page #%s' % pagenum)
-    
-                page = json.loads(page)
-    
+                page = self._download_json(
+                    url, channel_id, note=u'Downloading page #%s' % pagenum,
+                    transform_source=uppercase_escape)
+
                  ids_in_page = self.extract_videos_from_page(page['content_html'])
                  video_ids.extend(ids_in_page)
      
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 01c8c017d53a90477871f0ad4b6ef15250cc5e5b..fa8f80e024c40f4c2676a86971b790ee47484973 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1214,3 +1214,9 @@ class PagedList(object):
              if end == nextfirstid:
                  break
          return res
+
+
+def uppercase_escape(s):
+    return re.sub(
+        r'\\U([0-9a-fA-F]{8})',
+        lambda m: compat_chr(int(m.group(1), base=16)), s)
author	Philipp Hagemeister <phihag@phihag.de>
	Sun, 9 Feb 2014 16:56:10 +0000 (17:56 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Sun, 9 Feb 2014 16:56:10 +0000 (17:56 +0100)
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history