X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=35b95fdc2332f78288f3fdfc26fd271a5db8e353;hb=1c251cd9482bd0168ca844ad281317b5c19cd607;hp=d09128555f87f962339be4eb13f51d95c76e615f;hpb=c5e8d7af0ed867d70502491e3a80ee09b78ed2ce;p=youtube-dl.git diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d09128555..35b95fdc2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1,12 +1,11 @@ # coding: utf-8 -from __future__ import absolute_import import json import netrc import re import socket -from .common import InfoExtractor +from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( compat_http_client, compat_parse_qs, @@ -59,7 +58,7 @@ class YoutubeIE(InfoExtractor): '18': 'mp4', '22': 'mp4', '37': 'mp4', - '38': 'video', # You actually don't know if this will be MOV, AVI or whatever + '38': 'mp4', '43': 'webm', '44': 'webm', '45': 'webm', @@ -343,7 +342,7 @@ class YoutubeIE(InfoExtractor): request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) try: self.report_age_confirmation() - age_results = compat_urllib_request.urlopen(request).read().decode('utf-8') + compat_urllib_request.urlopen(request).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err)) @@ -471,7 +470,7 @@ class YoutubeIE(InfoExtractor): self._downloader.report_warning(sub_error) if self._downloader.params.get('listsubtitles', False): - sub_lang_list = self._list_available_subtitles(video_id) + self._list_available_subtitles(video_id) return if 'length_seconds' not in video_info: @@ -480,9 +479,6 @@ class YoutubeIE(InfoExtractor): else: video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]) - # token - video_token = compat_urllib_parse.unquote_plus(video_info['token'][0]) - # Decide which formats to download req_format = self._downloader.params.get('format', None) @@ -490,7 +486,7 @@ class YoutubeIE(InfoExtractor): mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage) info = json.loads(mobj.group(1)) args = info['args'] - if args.get('ptk','') == 'vevo' or 'dashmpd': + if args.get('ptk','') == 'vevo' or 'dashmpd' in args: # Vevo videos with encrypted signatures self.to_screen(u'%s: Vevo video detected.' % video_id) video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] @@ -755,3 +751,45 @@ class YoutubeUserIE(InfoExtractor): urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] url_results = [self.url_result(url, 'Youtube') for url in urls] return [self.playlist_result(url_results, playlist_title = username)] + +class YoutubeSearchIE(SearchInfoExtractor): + """Information Extractor for YouTube search queries.""" + _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' + _MAX_RESULTS = 1000 + IE_NAME = u'youtube:search' + _SEARCH_KEY = 'ytsearch' + + def report_download_page(self, query, pagenum): + """Report attempt to download search page with given number.""" + self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) + + def _get_n_results(self, query, n): + """Get a specified number of results for a query""" + + video_ids = [] + pagenum = 0 + limit = n + + while (50 * pagenum) < limit: + self.report_download_page(query, pagenum+1) + result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1) + request = compat_urllib_request.Request(result_url) + try: + data = compat_urllib_request.urlopen(request).read().decode('utf-8') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + raise ExtractorError(u'Unable to download API page: %s' % compat_str(err)) + api_response = json.loads(data)['data'] + + if not 'items' in api_response: + raise ExtractorError(u'[youtube] No video results') + + new_ids = list(video['id'] for video in api_response['items']) + video_ids += new_ids + + limit = min(n, api_response['totalItems']) + pagenum += 1 + + if len(video_ids) > n: + video_ids = video_ids[:n] + videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] + return self.playlist_result(videos, query)