From: Jaime Marquínez Ferrándiz <jaimemf93@gmail.com>
Date: Thu, 28 Mar 2013 12:02:04 +0000 (+0100)
Subject: Merge branch 'master' into extract_info_rewrite
X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=cfa90f4adce8b5e7faf92d0a08abe38630b150b8;p=youtube-dl.git

Merge branch 'master' into extract_info_rewrite
---

cfa90f4adce8b5e7faf92d0a08abe38630b150b8
diff --cc youtube_dl/InfoExtractors.py
index e714fa6b0,835428f32..dd4a776e4
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@@ -126,26 -126,14 +126,32 @@@ class InfoExtractor(object)
      def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
          """ Returns the data of the page as a string """
          urlh = self._request_webpage(url_or_request, video_id, note, errnote)
+         content_type = urlh.headers.get('Content-Type', '')
+         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
+         if m:
+             encoding = m.group(1)
+         else:
+             encoding = 'utf-8'
          webpage_bytes = urlh.read()
-         return webpage_bytes.decode('utf-8', 'replace')
+         return webpage_bytes.decode(encoding, 'replace')
 +        
 +    #Methods for following #608
 +    #They set the correct value of the '_type' key
 +    def video_result(self, video_info):
 +        """Returns a video"""
 +        video_info['_type'] = 'video'
 +        return video_info
 +    def url_result(self, url, ie=None):
 +        """Returns a url that points to a page that should be processed"""
 +        #TODO: ie should be the class used for getting the info
 +        video_info = {'_type': 'url',
 +                      'url': url}
 +        return video_info
 +    def playlist_result(self, entries):
 +        """Returns a playlist"""
 +        video_info = {'_type': 'playlist',
 +                      'entries': entries}
 +        return video_info
  
  
  class YoutubeIE(InfoExtractor):
@@@ -1362,20 -1382,15 +1400,15 @@@ class GenericIE(InfoExtractor)
              return False
  
          self.report_following_redirect(new_url)
 -        self._downloader.download([new_url])
 -        return True
 +        return new_url
  
      def _real_extract(self, url):
 -        if self._test_redirect(url): return
 +        new_url = self._test_redirect(url)
 +        if new_url: return [self.url_result(new_url)]
  
          video_id = url.split('/')[-1]
-         request = compat_urllib_request.Request(url)
          try:
-             self.report_download_webpage(video_id)
-             webpage = compat_urllib_request.urlopen(request).read()
-         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-             self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
-             return
+             webpage = self._download_webpage(url, video_id)
          except ValueError as err:
              # since this is the last-resort InfoExtractor, if
              # this error is thrown, it'll be thrown here