Refactor IDParser to search for elements by any attribute not just ID

[youtube-dl.git] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index 418251ec0382fcd589dc0f45eec531838d8f62be..9a41dde57bcc2eba44f16a43f1058cffdd1fb204 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -23,7 +23,7 @@ class InfoExtractor(object):
      Information extractors are the classes that, given a URL, extract
      information about the video (or videos) the URL refers to. This
      information includes the real video URL, the video title, author and
      Information extractors are the classes that, given a URL, extract
      information about the video (or videos) the URL refers to. This
      information includes the real video URL, the video title, author and
-    others. The information is stored in a dictionary which is then 
+    others. The information is stored in a dictionary which is then
      passed to the FileDownloader. The FileDownloader processes this
      information possibly downloading the video to the file system, among
      other possible outcomes.
      passed to the FileDownloader. The FileDownloader processes this
      information possibly downloading the video to the file system, among
      other possible outcomes.
@@ -159,7 +159,7 @@ class YoutubeIE(InfoExtractor):
          '44': '480x854',
          '45': '720x1280',
          '46': '1080x1920',
          '44': '480x854',
          '45': '720x1280',
          '46': '1080x1920',
-    }   
+    }
      IE_NAME = u'youtube'
  
      def suitable(self, url):
      IE_NAME = u'youtube'
  
      def suitable(self, url):
@@ -272,7 +272,7 @@ class YoutubeIE(InfoExtractor):
          request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
          try:
              self.report_login()
          request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
          try:
              self.report_login()
-            login_results = compat_urllib_request.urlopen(request).read()
+            login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
              if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
                  self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
                  return
              if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
                  self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
                  return
@@ -288,7 +288,7 @@ class YoutubeIE(InfoExtractor):
          request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
          try:
              self.report_age_confirmation()
          request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
          try:
              self.report_age_confirmation()
-            age_results = compat_urllib_request.urlopen(request).read()
+            age_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
              return
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
              return
@@ -399,7 +399,7 @@ class YoutubeIE(InfoExtractor):
                  self.report_video_subtitles_download(video_id)
                  request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
                  try:
                  self.report_video_subtitles_download(video_id)
                  request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
                  try:
-                    srt_list = compat_urllib_request.urlopen(request).read()
+                    srt_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
                  except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                      raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
                  srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
                  except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                      raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
                  srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
@@ -416,14 +416,14 @@ class YoutubeIE(InfoExtractor):
                      raise Trouble(u'WARNING: no closed captions found in the specified language')
                  request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id))
                  try:
                      raise Trouble(u'WARNING: no closed captions found in the specified language')
                  request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id))
                  try:
-                    srt_xml = compat_urllib_request.urlopen(request).read()
+                    srt_xml = compat_urllib_request.urlopen(request).read().decode('utf-8')
                  except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                      raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
                  if not srt_xml:
                      raise Trouble(u'WARNING: unable to download video subtitles')
                  except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                      raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
                  if not srt_xml:
                      raise Trouble(u'WARNING: unable to download video subtitles')
-                video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
+                video_subtitles = self._closed_captions_xml_to_srt(srt_xml)
              except Trouble as trouble:
              except Trouble as trouble:
-                self._downloader.trouble(trouble[0])
+                self._downloader.trouble(str(trouble))
  
          if 'length_seconds' not in video_info:
              self._downloader.trouble(u'WARNING: unable to extract video duration')
  
          if 'length_seconds' not in video_info:
              self._downloader.trouble(u'WARNING: unable to extract video duration')
@@ -731,99 +731,6 @@ class DailymotionIE(InfoExtractor):
          }]
  
  
          }]
  
  
-class GoogleIE(InfoExtractor):
-    """Information extractor for video.google.com."""
-
-    _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
-    IE_NAME = u'video.google'
-
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
-    def report_download_webpage(self, video_id):
-        """Report webpage download."""
-        self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
-
-    def report_extraction(self, video_id):
-        """Report information extraction."""
-        self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
-
-    def _real_extract(self, url):
-        # Extract id from URL
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
-            return
-
-        video_id = mobj.group(1)
-
-        video_extension = 'mp4'
-
-        # Retrieve video webpage to extract further information
-        request = compat_urllib_request.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
-        try:
-            self.report_download_webpage(video_id)
-            webpage = compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
-            return
-
-        # Extract URL, uploader, and title from webpage
-        self.report_extraction(video_id)
-        mobj = re.search(r"download_url:'([^']+)'", webpage)
-        if mobj is None:
-            video_extension = 'flv'
-            mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
-        if mobj is None:
-            self._downloader.trouble(u'ERROR: unable to extract media URL')
-            return
-        mediaURL = compat_urllib_parse.unquote(mobj.group(1))
-        mediaURL = mediaURL.replace('\\x3d', '\x3d')
-        mediaURL = mediaURL.replace('\\x26', '\x26')
-
-        video_url = mediaURL
-
-        mobj = re.search(r'<title>(.*)</title>', webpage)
-        if mobj is None:
-            self._downloader.trouble(u'ERROR: unable to extract title')
-            return
-        video_title = mobj.group(1).decode('utf-8')
-
-        # Extract video description
-        mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
-        if mobj is None:
-            self._downloader.trouble(u'ERROR: unable to extract video description')
-            return
-        video_description = mobj.group(1).decode('utf-8')
-        if not video_description:
-            video_description = 'No description available.'
-
-        # Extract video thumbnail
-        if self._downloader.params.get('forcethumbnail', False):
-            request = compat_urllib_request.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
-            try:
-                webpage = compat_urllib_request.urlopen(request).read()
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
-                return
-            mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
-            if mobj is None:
-                self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
-                return
-            video_thumbnail = mobj.group(1)
-        else:   # we need something to pass to process_info
-            video_thumbnail = ''
-
-        return [{
-            'id':       video_id.decode('utf-8'),
-            'url':      video_url.decode('utf-8'),
-            'uploader': None,
-            'upload_date':  None,
-            'title':    video_title,
-            'ext':      video_extension.decode('utf-8'),
-        }]
-
-
  class PhotobucketIE(InfoExtractor):
      """Information extractor for photobucket.com."""
  
  class PhotobucketIE(InfoExtractor):
      """Information extractor for photobucket.com."""
  
@@ -892,6 +799,7 @@ class PhotobucketIE(InfoExtractor):
  class YahooIE(InfoExtractor):
      """Information extractor for video.yahoo.com."""
  
  class YahooIE(InfoExtractor):
      """Information extractor for video.yahoo.com."""
  
+    _WORKING = False
      # _VALID_URL matches all Yahoo! Video URLs
      # _VPAGE_URL matches only the extractable '/watch/' URLs
      _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
      # _VALID_URL matches all Yahoo! Video URLs
      # _VPAGE_URL matches only the extractable '/watch/' URLs
      _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
@@ -1080,7 +988,7 @@ class VimeoIE(InfoExtractor):
          except:
              self._downloader.trouble(u'ERROR: unable to extract info section')
              return
          except:
              self._downloader.trouble(u'ERROR: unable to extract info section')
              return
-        
+
          # Extract title
          video_title = config["video"]["title"]
  
          # Extract title
          video_title = config["video"]["title"]
  
@@ -1263,7 +1171,7 @@ class ArteTvIE(InfoExtractor):
              'url':          compat_urllib_parse.unquote(info.get('url')),
              'uploader':     u'arte.tv',
              'upload_date':  info.get('date'),
              'url':          compat_urllib_parse.unquote(info.get('url')),
              'uploader':     u'arte.tv',
              'upload_date':  info.get('date'),
-            'title':        info.get('title'),
+            'title':        info.get('title').decode('utf-8'),
              'ext':          u'mp4',
              'format':       u'NA',
              'player_url':   None,
              'ext':          u'mp4',
              'format':       u'NA',
              'player_url':   None,
@@ -1303,7 +1211,7 @@ class GenericIE(InfoExtractor):
      def report_following_redirect(self, new_url):
          """Report information extraction."""
          self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
      def report_following_redirect(self, new_url):
          """Report information extraction."""
          self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
-        
+
      def _test_redirect(self, url):
          """Check if it is a redirect, like url shorteners, in case restart chain."""
          class HeadRequest(compat_urllib_request.Request):
      def _test_redirect(self, url):
          """Check if it is a redirect, like url shorteners, in case restart chain."""
          class HeadRequest(compat_urllib_request.Request):
@@ -1312,38 +1220,38 @@ class GenericIE(InfoExtractor):
  
          class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
              """
  
          class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
              """
-            Subclass the HTTPRedirectHandler to make it use our 
+            Subclass the HTTPRedirectHandler to make it use our
              HeadRequest also on the redirected URL
              """
              HeadRequest also on the redirected URL
              """
-            def redirect_request(self, req, fp, code, msg, headers, newurl): 
+            def redirect_request(self, req, fp, code, msg, headers, newurl):
                  if code in (301, 302, 303, 307):
                  if code in (301, 302, 303, 307):
-                    newurl = newurl.replace(' ', '%20') 
+                    newurl = newurl.replace(' ', '%20')
                      newheaders = dict((k,v) for k,v in req.headers.items()
                                        if k.lower() not in ("content-length", "content-type"))
                      newheaders = dict((k,v) for k,v in req.headers.items()
                                        if k.lower() not in ("content-length", "content-type"))
-                    return HeadRequest(newurl, 
+                    return HeadRequest(newurl,
                                         headers=newheaders,
                                         headers=newheaders,
-                                       origin_req_host=req.get_origin_req_host(), 
-                                       unverifiable=True) 
-                else: 
-                    raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp) 
+                                       origin_req_host=req.get_origin_req_host(),
+                                       unverifiable=True)
+                else:
+                    raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
  
          class HTTPMethodFallback(compat_urllib_request.BaseHandler):
              """
              Fallback to GET if HEAD is not allowed (405 HTTP error)
              """
  
          class HTTPMethodFallback(compat_urllib_request.BaseHandler):
              """
              Fallback to GET if HEAD is not allowed (405 HTTP error)
              """
-            def http_error_405(self, req, fp, code, msg, headers): 
+            def http_error_405(self, req, fp, code, msg, headers):
                  fp.read()
                  fp.close()
  
                  newheaders = dict((k,v) for k,v in req.headers.items()
                                    if k.lower() not in ("content-length", "content-type"))
                  fp.read()
                  fp.close()
  
                  newheaders = dict((k,v) for k,v in req.headers.items()
                                    if k.lower() not in ("content-length", "content-type"))
-                return self.parent.open(compat_urllib_request.Request(req.get_full_url(), 
-                                                 headers=newheaders, 
-                                                 origin_req_host=req.get_origin_req_host(), 
+                return self.parent.open(compat_urllib_request.Request(req.get_full_url(),
+                                                 headers=newheaders,
+                                                 origin_req_host=req.get_origin_req_host(),
                                                   unverifiable=True))
  
          # Build our opener
                                                   unverifiable=True))
  
          # Build our opener
-        opener = compat_urllib_request.OpenerDirector() 
+        opener = compat_urllib_request.OpenerDirector()
          for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
                          HTTPMethodFallback, HEADRedirectHandler,
                          compat_urllib_error.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
          for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
                          HTTPMethodFallback, HEADRedirectHandler,
                          compat_urllib_error.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
@@ -1588,6 +1496,8 @@ class GoogleSearchIE(InfoExtractor):
  
  class YahooSearchIE(InfoExtractor):
      """Information Extractor for Yahoo! Video search queries."""
  
  class YahooSearchIE(InfoExtractor):
      """Information Extractor for Yahoo! Video search queries."""
+
+    _WORKING = False
      _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
      _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
      _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
      _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
      _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
      _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
@@ -1715,7 +1625,7 @@ class YoutubePlaylistIE(InfoExtractor):
              url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)
              request = compat_urllib_request.Request(url)
              try:
              url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)
              request = compat_urllib_request.Request(url)
              try:
-                page = compat_urllib_request.urlopen(request).read().decode('utf8')
+                page = compat_urllib_request.urlopen(request).read().decode('utf-8')
              except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                  self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                  return
              except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                  self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                  return
@@ -1844,7 +1754,7 @@ class YoutubeUserIE(InfoExtractor):
              request = compat_urllib_request.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
  
              try:
              request = compat_urllib_request.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
  
              try:
-                page = compat_urllib_request.urlopen(request).read()
+                page = compat_urllib_request.urlopen(request).read().decode('utf-8')
              except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                  self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                  return
              except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                  self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                  return
@@ -2346,7 +2256,7 @@ class MyVideoIE(InfoExtractor):
  
      def __init__(self, downloader=None):
          InfoExtractor.__init__(self, downloader)
  
      def __init__(self, downloader=None):
          InfoExtractor.__init__(self, downloader)
-    
+
      def report_download_webpage(self, video_id):
          """Report webpage download."""
          self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
      def report_download_webpage(self, video_id):
          """Report webpage download."""
          self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
@@ -2367,7 +2277,7 @@ class MyVideoIE(InfoExtractor):
          request = compat_urllib_request.Request('http://www.myvideo.de/watch/%s' % video_id)
          try:
              self.report_download_webpage(video_id)
          request = compat_urllib_request.Request('http://www.myvideo.de/watch/%s' % video_id)
          try:
              self.report_download_webpage(video_id)
-            webpage = compat_urllib_request.urlopen(request).read()
+            webpage = compat_urllib_request.urlopen(request).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
              return
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
              return
@@ -2400,10 +2310,10 @@ class ComedyCentralIE(InfoExtractor):
      """Information extractor for The Daily Show and Colbert Report """
  
      # urls can be abbreviations like :thedailyshow or :colbert
      """Information extractor for The Daily Show and Colbert Report """
  
      # urls can be abbreviations like :thedailyshow or :colbert
-    # urls for episodes like: 
+    # urls for episodes like:
      # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
      #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
      # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
      #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
-    #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524    
+    #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
      _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
                        |(https?://)?(www\.)?
                            (?P<showname>thedailyshow|colbertnation)\.com/
      _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
                        |(https?://)?(www\.)?
                            (?P<showname>thedailyshow|colbertnation)\.com/
@@ -2411,7 +2321,7 @@ class ComedyCentralIE(InfoExtractor):
                            (?P<clip>
                                (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                                |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
                            (?P<clip>
                                (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                                |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
-                     $"""                        
+                     $"""
      IE_NAME = u'comedycentral'
  
      _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
      IE_NAME = u'comedycentral'
  
      _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
@@ -2515,7 +2425,7 @@ class ComedyCentralIE(InfoExtractor):
                  return
              else:
                  mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
                  return
              else:
                  mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
-        
+
          playerUrl_raw = mMovieParams[0][0]
          self.report_player_url(epTitle)
          try:
          playerUrl_raw = mMovieParams[0][0]
          self.report_player_url(epTitle)
          try:
@@ -2564,7 +2474,7 @@ class ComedyCentralIE(InfoExtractor):
              if len(turls) == 0:
                  self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
                  continue
              if len(turls) == 0:
                  self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
                  continue
-            
+
              if self._downloader.params.get('listformats', None):
                  self._print_formats([i[0] for i in turls])
                  return
              if self._downloader.params.get('listformats', None):
                  self._print_formats([i[0] for i in turls])
                  return
@@ -2604,7 +2514,7 @@ class ComedyCentralIE(InfoExtractor):
              }
  
              results.append(info)
              }
  
              results.append(info)
-            
+
          return results
  
  
          return results
  
  
@@ -2649,7 +2559,9 @@ class EscapistIE(InfoExtractor):
  
          self.report_config_download(showName)
          try:
  
          self.report_config_download(showName)
          try:
-            configJSON = compat_urllib_request.urlopen(configUrl).read()
+            configJSON = compat_urllib_request.urlopen(configUrl)
+            m = re.match(r'text/html; charset="?([^"]+)"?', configJSON.headers['Content-Type'])
+            configJSON = configJSON.read().decode(m.group(1) if m else 'utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: unable to download configuration: ' + compat_str(err))
              return
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: unable to download configuration: ' + compat_str(err))
              return
@@ -2969,6 +2881,8 @@ class InfoQIE(InfoExtractor):
  
  class MixcloudIE(InfoExtractor):
      """Information extractor for www.mixcloud.com"""
  
  class MixcloudIE(InfoExtractor):
      """Information extractor for www.mixcloud.com"""
+
+    _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
      _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
      IE_NAME = u'mixcloud'
  
      _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
      IE_NAME = u'mixcloud'
  
@@ -3164,7 +3078,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
                  assert entry['type'] == 'reference'
                  results += self.extract(entry['url'])
              return results
                  assert entry['type'] == 'reference'
                  results += self.extract(entry['url'])
              return results
-            
+
          else: # Root page
              info = {
                  'id': 'Stanford OpenClassroom',
          else: # Root page
              info = {
                  'id': 'Stanford OpenClassroom',
@@ -3238,7 +3152,7 @@ class MTVIE(InfoExtractor):
              self._downloader.trouble(u'ERROR: unable to extract performer')
              return
          performer = unescapeHTML(mobj.group(1).decode('iso-8859-1'))
              self._downloader.trouble(u'ERROR: unable to extract performer')
              return
          performer = unescapeHTML(mobj.group(1).decode('iso-8859-1'))
-        video_title = performer + ' - ' + song_name 
+        video_title = performer + ' - ' + song_name
  
          mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
          if mobj is None:
  
          mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
          if mobj is None:
@@ -3471,7 +3385,7 @@ class XNXXIE(InfoExtractor):
  class GooglePlusIE(InfoExtractor):
      """Information extractor for plus.google.com."""
  
  class GooglePlusIE(InfoExtractor):
      """Information extractor for plus.google.com."""
  
-    _VALID_URL = r'(?:https://)?plus\.google\.com/(?:\w+/)*?(\d+)/posts/(\w+)'
+    _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
      IE_NAME = u'plus.google'
  
      def __init__(self, downloader=None):
      IE_NAME = u'plus.google'
  
      def __init__(self, downloader=None):
@@ -3479,7 +3393,7 @@ class GooglePlusIE(InfoExtractor):
  
      def report_extract_entry(self, url):
          """Report downloading extry"""
  
      def report_extract_entry(self, url):
          """Report downloading extry"""
-        self._downloader.to_screen(u'[plus.google] Downloading entry: %s' % url.decode('utf-8'))
+        self._downloader.to_screen(u'[plus.google] Downloading entry: %s' % url)
  
      def report_date(self, upload_date):
          """Report downloading extry"""
  
      def report_date(self, upload_date):
          """Report downloading extry"""
@@ -3487,15 +3401,15 @@ class GooglePlusIE(InfoExtractor):
  
      def report_uploader(self, uploader):
          """Report downloading extry"""
  
      def report_uploader(self, uploader):
          """Report downloading extry"""
-        self._downloader.to_screen(u'[plus.google] Uploader: %s' % uploader.decode('utf-8'))
+        self._downloader.to_screen(u'[plus.google] Uploader: %s' % uploader)
  
      def report_title(self, video_title):
          """Report downloading extry"""
  
      def report_title(self, video_title):
          """Report downloading extry"""
-        self._downloader.to_screen(u'[plus.google] Title: %s' % video_title.decode('utf-8'))
+        self._downloader.to_screen(u'[plus.google] Title: %s' % video_title)
  
      def report_extract_vid_page(self, video_page):
          """Report information extraction."""
  
      def report_extract_vid_page(self, video_page):
          """Report information extraction."""
-        self._downloader.to_screen(u'[plus.google] Extracting video page: %s' % video_page.decode('utf-8'))
+        self._downloader.to_screen(u'[plus.google] Extracting video page: %s' % video_page)
  
      def _real_extract(self, url):
          # Extract id from URL
  
      def _real_extract(self, url):
          # Extract id from URL
@@ -3505,7 +3419,7 @@ class GooglePlusIE(InfoExtractor):
              return
  
          post_url = mobj.group(0)
              return
  
          post_url = mobj.group(0)
-        video_id = mobj.group(2)
+        video_id = mobj.group(1)
  
          video_extension = 'flv'
  
  
          video_extension = 'flv'
  
@@ -3513,7 +3427,7 @@ class GooglePlusIE(InfoExtractor):
          self.report_extract_entry(post_url)
          request = compat_urllib_request.Request(post_url)
          try:
          self.report_extract_entry(post_url)
          request = compat_urllib_request.Request(post_url)
          try:
-            webpage = compat_urllib_request.urlopen(request).read()
+            webpage = compat_urllib_request.urlopen(request).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err))
              return
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err))
              return
@@ -3555,7 +3469,7 @@ class GooglePlusIE(InfoExtractor):
          video_page = mobj.group(1)
          request = compat_urllib_request.Request(video_page)
          try:
          video_page = mobj.group(1)
          request = compat_urllib_request.Request(video_page)
          try:
-            webpage = compat_urllib_request.urlopen(request).read()
+            webpage = compat_urllib_request.urlopen(request).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
              return
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
              return
@@ -3577,16 +3491,19 @@ class GooglePlusIE(InfoExtractor):
          # Only get the url. The resolution part in the tuple has no use anymore
          video_url = video_url[-1]
          # Treat escaped \u0026 style hex
          # Only get the url. The resolution part in the tuple has no use anymore
          video_url = video_url[-1]
          # Treat escaped \u0026 style hex
-        video_url = unicode(video_url, "unicode_escape")
+        try:
+            video_url = video_url.decode("unicode_escape")
+        except AttributeError: # Python 3
+            video_url = bytes(video_url, 'ascii').decode('unicode-escape')
  
  
          return [{
  
  
          return [{
-            'id':       video_id.decode('utf-8'),
+            'id':       video_id,
              'url':      video_url,
              'url':      video_url,
-            'uploader': uploader.decode('utf-8'),
-            'upload_date':  upload_date.decode('utf-8'),
-            'title':    video_title.decode('utf-8'),
-            'ext':      video_extension.decode('utf-8'),
+            'uploader': uploader,
+            'upload_date':  upload_date,
+            'title':    video_title,
+            'ext':      video_extension,
          }]
  
  class NBAIE(InfoExtractor):
          }]
  
  class NBAIE(InfoExtractor):
@@ -3664,7 +3581,7 @@ class JustinTVIE(InfoExtractor):
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: unable to download video info JSON: %s' % compat_str(err))
              return
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: unable to download video info JSON: %s' % compat_str(err))
              return
-        
+
          response = json.loads(webpage)
          info = []
          for clip in response:
          response = json.loads(webpage)
          info = []
          for clip in response:
@@ -3687,7 +3604,7 @@ class JustinTVIE(InfoExtractor):
          if mobj is None:
              self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
              return
          if mobj is None:
              self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
              return
-        
+
          api = 'http://api.justin.tv'
          video_id = mobj.group(mobj.lastindex)
          paged = False
          api = 'http://api.justin.tv'
          video_id = mobj.group(mobj.lastindex)
          paged = False
@@ -3697,9 +3614,9 @@ class JustinTVIE(InfoExtractor):
          else:
              api += '/clip/show/%s.json'
          api = api % (video_id,)
          else:
              api += '/clip/show/%s.json'
          api = api % (video_id,)
-        
+
          self.report_extraction(video_id)
          self.report_extraction(video_id)
-        
+
          info = []
          offset = 0
          limit = self._JUSTIN_PAGE_LIMIT
          info = []
          offset = 0
          limit = self._JUSTIN_PAGE_LIMIT