From: Jaime Marquínez Ferrándiz Date: Thu, 28 Mar 2013 12:02:04 +0000 (+0100) Subject: Merge branch 'master' into extract_info_rewrite X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=cfa90f4adce8b5e7faf92d0a08abe38630b150b8;p=youtube-dl.git Merge branch 'master' into extract_info_rewrite --- cfa90f4adce8b5e7faf92d0a08abe38630b150b8 diff --cc youtube_dl/InfoExtractors.py index e714fa6b0,835428f32..dd4a776e4 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@@ -126,26 -126,14 +126,32 @@@ class InfoExtractor(object) def _download_webpage(self, url_or_request, video_id, note=None, errnote=None): """ Returns the data of the page as a string """ urlh = self._request_webpage(url_or_request, video_id, note, errnote) + content_type = urlh.headers.get('Content-Type', '') + m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) + if m: + encoding = m.group(1) + else: + encoding = 'utf-8' webpage_bytes = urlh.read() - return webpage_bytes.decode('utf-8', 'replace') + return webpage_bytes.decode(encoding, 'replace') + + #Methods for following #608 + #They set the correct value of the '_type' key + def video_result(self, video_info): + """Returns a video""" + video_info['_type'] = 'video' + return video_info + def url_result(self, url, ie=None): + """Returns a url that points to a page that should be processed""" + #TODO: ie should be the class used for getting the info + video_info = {'_type': 'url', + 'url': url} + return video_info + def playlist_result(self, entries): + """Returns a playlist""" + video_info = {'_type': 'playlist', + 'entries': entries} + return video_info class YoutubeIE(InfoExtractor): @@@ -1362,20 -1382,15 +1400,15 @@@ class GenericIE(InfoExtractor) return False self.report_following_redirect(new_url) - self._downloader.download([new_url]) - return True + return new_url def _real_extract(self, url): - if self._test_redirect(url): return + new_url = self._test_redirect(url) + if new_url: return [self.url_result(new_url)] video_id = url.split('/')[-1] - request = compat_urllib_request.Request(url) try: - self.report_download_webpage(video_id) - webpage = compat_urllib_request.urlopen(request).read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) - return + webpage = self._download_webpage(url, video_id) except ValueError as err: # since this is the last-resort InfoExtractor, if # this error is thrown, it'll be thrown here