9 from .common import InfoExtractor, SearchInfoExtractor
10 from .subtitles import SubtitlesInfoExtractor
16 compat_urllib_request,
27 class YoutubeBaseInfoExtractor(InfoExtractor):
28 """Provide base functions for Youtube extractors"""
29 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
30 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
31 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
32 _NETRC_MACHINE = 'youtube'
33 # If True it will raise an error if no login info is provided
34 _LOGIN_REQUIRED = False
36 def report_lang(self):
37 """Report attempt to set language."""
38 self.to_screen(u'Setting language')
40 def _set_language(self):
41 request = compat_urllib_request.Request(self._LANG_URL)
44 compat_urllib_request.urlopen(request).read()
45 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
46 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
51 (username, password) = self._get_login_info()
52 # No authentication to be performed
54 if self._LOGIN_REQUIRED:
55 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
58 request = compat_urllib_request.Request(self._LOGIN_URL)
60 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
61 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
62 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
67 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
70 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
76 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
80 u'PersistentCookie': u'yes',
82 u'bgresponse': u'js_disabled',
83 u'checkConnection': u'',
84 u'checkedDomains': u'youtube',
90 u'signIn': u'Sign in',
92 u'service': u'youtube',
96 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
98 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
99 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
100 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
103 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
104 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
105 self._downloader.report_warning(u'unable to log in: bad username or password')
107 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
108 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
112 def _confirm_age(self):
115 'action_confirm': 'Confirm',
117 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
119 self.report_age_confirmation()
120 compat_urllib_request.urlopen(request).read().decode('utf-8')
121 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
122 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
125 def _real_initialize(self):
126 if self._downloader is None:
128 if not self._set_language():
130 if not self._login():
135 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
136 IE_DESC = u'YouTube.com'
139 (?:https?://)? # http(s):// (optional)
140 (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
141 tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
142 (?:.*?\#/)? # handle anchor (#/) redirect urls
143 (?: # the various things that can precede the ID:
144 (?:(?:v|embed|e)/) # v/ or embed/ or e/
145 |(?: # or the v= param in all its forms
146 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
147 (?:\?|\#!?) # the params delimiter ? or # or #!
148 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
152 |youtu\.be/ # just youtu.be/xxxx
154 )? # all until now is optional -> you can pass the naked ID
155 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
156 (?(1).+)? # if we found the ID, everything can follow
158 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
159 # Listed in order of quality
160 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
161 # Apple HTTP Live Streaming
162 '96', '95', '94', '93', '92', '132', '151',
164 '85', '84', '102', '83', '101', '82', '100',
166 '138', '137', '248', '136', '247', '135', '246',
167 '245', '244', '134', '243', '133', '242', '160',
169 '141', '172', '140', '171', '139',
171 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
172 # Apple HTTP Live Streaming
173 '96', '95', '94', '93', '92', '132', '151',
175 '85', '102', '84', '101', '83', '100', '82',
177 '138', '248', '137', '247', '136', '246', '245',
178 '244', '135', '243', '134', '242', '133', '160',
180 '172', '141', '171', '140', '139',
182 _video_formats_map = {
183 'flv': ['35', '34', '6', '5'],
184 '3gp': ['36', '17', '13'],
185 'mp4': ['38', '37', '22', '18'],
186 'webm': ['46', '45', '44', '43'],
188 _video_extensions = {
210 # Apple HTTP Live Streaming
242 _video_dimensions = {
324 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
325 u"file": u"BaW_jenozKc.mp4",
327 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
328 u"uploader": u"Philipp Hagemeister",
329 u"uploader_id": u"phihag",
330 u"upload_date": u"20121002",
331 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
335 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
336 u"file": u"1ltcDfZMA3U.flv",
337 u"note": u"Test VEVO video (#897)",
339 u"upload_date": u"20070518",
340 u"title": u"Maps - It Will Find You",
341 u"description": u"Music video by Maps performing It Will Find You.",
342 u"uploader": u"MuteUSA",
343 u"uploader_id": u"MuteUSA"
347 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
348 u"file": u"UxxajLWwzqY.mp4",
349 u"note": u"Test generic use_cipher_signature video (#897)",
351 u"upload_date": u"20120506",
352 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
353 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
354 u"uploader": u"Icona Pop",
355 u"uploader_id": u"IconaPop"
359 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
360 u"file": u"07FYdnEawAQ.mp4",
361 u"note": u"Test VEVO video with age protection (#956)",
363 u"upload_date": u"20130703",
364 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
365 u"description": u"md5:64249768eec3bc4276236606ea996373",
366 u"uploader": u"justintimberlakeVEVO",
367 u"uploader_id": u"justintimberlakeVEVO"
371 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
372 u'file': u'TGi3HqYrWHE.mp4',
373 u'note': u'm3u8 video',
375 u'title': u'Triathlon - Men - London 2012 Olympic Games',
376 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
377 u'uploader': u'olympic',
378 u'upload_date': u'20120807',
379 u'uploader_id': u'olympic',
382 u'skip_download': True,
389 def suitable(cls, url):
390 """Receives a URL and returns True if suitable for this IE."""
391 if YoutubePlaylistIE.suitable(url): return False
392 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
394 def report_video_webpage_download(self, video_id):
395 """Report attempt to download video webpage."""
396 self.to_screen(u'%s: Downloading video webpage' % video_id)
398 def report_video_info_webpage_download(self, video_id):
399 """Report attempt to download video info webpage."""
400 self.to_screen(u'%s: Downloading video info webpage' % video_id)
402 def report_information_extraction(self, video_id):
403 """Report attempt to extract video information."""
404 self.to_screen(u'%s: Extracting video information' % video_id)
406 def report_unavailable_format(self, video_id, format):
407 """Report extracted video URL."""
408 self.to_screen(u'%s: Format %s not available' % (video_id, format))
410 def report_rtmp_download(self):
411 """Indicate the download will use the RTMP protocol."""
412 self.to_screen(u'RTMP download detected')
414 def _decrypt_signature(self, s):
415 """Turn the encrypted s field into a working signature"""
418 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
420 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
422 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
424 return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
426 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
428 return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
430 return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1]
432 return s[81:36:-1] + s[0] + s[35:2:-1]
434 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
436 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
438 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
440 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
442 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
445 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
447 def _decrypt_signature_age_gate(self, s):
448 # The videos with age protection use another player, so the algorithms
451 return s[2:63] + s[82] + s[64:82] + s[63]
453 # Fallback to the other algortihms
454 return self._decrypt_signature(s)
456 def _get_available_subtitles(self, video_id):
457 request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
459 sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
460 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
461 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
463 lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
468 params = compat_urllib_parse.urlencode({
471 'fmt': self._downloader.params.get('subtitlesformat'),
473 url = u'http://www.youtube.com/api/timedtext?' + params
474 sub_lang_list[lang] = url
475 if not sub_lang_list:
476 self._downloader.report_warning(u'video doesn\'t have subtitles')
480 def _request_automatic_caption(self, video_id, webpage):
481 """We need the webpage for getting the captions url, pass it as an
482 argument to speed up the process."""
483 sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
484 sub_format = self._downloader.params.get('subtitlesformat')
485 self.to_screen(u'%s: Looking for automatic captions' % video_id)
486 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
487 err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
489 self._downloader.report_warning(err_msg)
491 player_config = json.loads(mobj.group(1))
493 args = player_config[u'args']
494 caption_url = args[u'ttsurl']
495 timestamp = args[u'timestamp']
496 params = compat_urllib_parse.urlencode({
503 subtitles_url = caption_url + '&' + params
504 sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
505 return {sub_lang: sub}
506 # An extractor error can be raise by the download process if there are
507 # no automatic captions but there are subtitles
508 except (KeyError, ExtractorError):
509 self._downloader.report_warning(err_msg)
512 def _print_formats(self, formats):
513 print('Available formats:')
515 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
516 self._video_dimensions.get(x, '???'),
517 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
519 def _extract_id(self, url):
520 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
522 raise ExtractorError(u'Invalid URL: %s' % url)
523 video_id = mobj.group(2)
526 def _get_video_url_list(self, url_map):
528 Transform a dictionary in the format {itag:url} to a list of (itag, url)
529 with the requested formats.
531 req_format = self._downloader.params.get('format', None)
532 format_limit = self._downloader.params.get('format_limit', None)
533 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
534 if format_limit is not None and format_limit in available_formats:
535 format_list = available_formats[available_formats.index(format_limit):]
537 format_list = available_formats
538 existing_formats = [x for x in format_list if x in url_map]
539 if len(existing_formats) == 0:
540 raise ExtractorError(u'no known formats available for video')
541 if self._downloader.params.get('listformats', None):
542 self._print_formats(existing_formats)
544 if req_format is None or req_format == 'best':
545 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
546 elif req_format == 'worst':
547 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
548 elif req_format in ('-1', 'all'):
549 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
551 # Specific formats. We pick the first in a slash-delimeted sequence.
552 # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
553 # available in the specified format. For example,
554 # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
555 # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
556 # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
557 req_formats = req_format.split('/')
558 video_url_list = None
559 for rf in req_formats:
561 video_url_list = [(rf, url_map[rf])]
563 if rf in self._video_formats_map:
564 for srf in self._video_formats_map[rf]:
566 video_url_list = [(srf, url_map[srf])]
571 if video_url_list is None:
572 raise ExtractorError(u'requested format not available')
573 return video_url_list
575 def _extract_from_m3u8(self, manifest_url, video_id):
577 def _get_urls(_manifest):
578 lines = _manifest.split('\n')
579 urls = filter(lambda l: l and not l.startswith('#'),
582 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
583 formats_urls = _get_urls(manifest)
584 for format_url in formats_urls:
585 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
586 url_map[itag] = format_url
589 def _real_extract(self, url):
590 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
591 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
593 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
594 mobj = re.search(self._NEXT_URL_RE, url)
596 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
597 video_id = self._extract_id(url)
600 self.report_video_webpage_download(video_id)
601 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
602 request = compat_urllib_request.Request(url)
604 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
605 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
606 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
608 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
610 # Attempt to extract SWF player URL
611 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
613 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
618 self.report_video_info_webpage_download(video_id)
619 if re.search(r'player-age-gate-content">', video_webpage) is not None:
620 self.report_age_confirmation()
622 # We simulate the access to the video from www.youtube.com/v/{video_id}
623 # this can be viewed without login into Youtube
624 data = compat_urllib_parse.urlencode({'video_id': video_id,
628 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
632 video_info_url = 'https://www.youtube.com/get_video_info?' + data
633 video_info_webpage = self._download_webpage(video_info_url, video_id,
635 errnote='unable to download video info webpage')
636 video_info = compat_parse_qs(video_info_webpage)
639 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
640 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
641 % (video_id, el_type))
642 video_info_webpage = self._download_webpage(video_info_url, video_id,
644 errnote='unable to download video info webpage')
645 video_info = compat_parse_qs(video_info_webpage)
646 if 'token' in video_info:
648 if 'token' not in video_info:
649 if 'reason' in video_info:
650 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
652 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
654 # Check for "rental" videos
655 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
656 raise ExtractorError(u'"rental" videos not supported')
658 # Start extracting information
659 self.report_information_extraction(video_id)
662 if 'author' not in video_info:
663 raise ExtractorError(u'Unable to extract uploader name')
664 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
667 video_uploader_id = None
668 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
670 video_uploader_id = mobj.group(1)
672 self._downloader.report_warning(u'unable to extract uploader nickname')
675 if 'title' not in video_info:
676 raise ExtractorError(u'Unable to extract video title')
677 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
680 # We try first to get a high quality image:
681 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
682 video_webpage, re.DOTALL)
683 if m_thumb is not None:
684 video_thumbnail = m_thumb.group(1)
685 elif 'thumbnail_url' not in video_info:
686 self._downloader.report_warning(u'unable to extract video thumbnail')
688 else: # don't panic if we can't find it
689 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
693 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
695 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
696 upload_date = unified_strdate(upload_date)
699 video_description = get_element_by_id("eow-description", video_webpage)
700 if video_description:
701 video_description = clean_html(video_description)
703 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
705 video_description = unescapeHTML(fd_mobj.group(1))
707 video_description = u''
710 video_subtitles = self.extract_subtitles(video_id, video_webpage)
712 if self._downloader.params.get('listsubtitles', False):
713 self._list_available_subtitles(video_id)
716 if 'length_seconds' not in video_info:
717 self._downloader.report_warning(u'unable to extract video duration')
720 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
722 # Decide which formats to download
725 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
727 raise ValueError('Could not find vevo ID')
728 info = json.loads(mobj.group(1))
730 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
731 # this signatures are encrypted
732 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
734 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
735 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
736 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
738 if 'url_encoded_fmt_stream_map' in video_info:
739 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
741 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
742 elif 'adaptive_fmts' in video_info:
743 if 'url_encoded_fmt_stream_map' in video_info:
744 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
746 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
750 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
751 self.report_rtmp_download()
752 video_url_list = [(None, video_info['conn'][0])]
753 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
754 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
755 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
757 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
758 url_data = compat_parse_qs(url_data_str)
759 if 'itag' in url_data and 'url' in url_data:
760 url = url_data['url'][0]
761 if 'sig' in url_data:
762 url += '&signature=' + url_data['sig'][0]
763 elif 's' in url_data:
764 if self._downloader.params.get('verbose'):
767 player_version = self._search_regex(r'ad3-(.+?)\.swf',
768 video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
769 'flash player', fatal=False)
770 player = 'flash player %s' % player_version
772 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
773 'html5 player', fatal=False)
774 parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
775 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
776 (len(s), parts_sizes, url_data['itag'][0], player))
777 encrypted_sig = url_data['s'][0]
779 signature = self._decrypt_signature_age_gate(encrypted_sig)
781 signature = self._decrypt_signature(encrypted_sig)
782 url += '&signature=' + signature
783 if 'ratebypass' not in url:
784 url += '&ratebypass=yes'
785 url_map[url_data['itag'][0]] = url
786 video_url_list = self._get_video_url_list(url_map)
787 if not video_url_list:
789 elif video_info.get('hlsvp'):
790 manifest_url = video_info['hlsvp'][0]
791 url_map = self._extract_from_m3u8(manifest_url, video_id)
792 video_url_list = self._get_video_url_list(url_map)
793 if not video_url_list:
797 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
800 for format_param, video_real_url in video_url_list:
802 video_extension = self._video_extensions.get(format_param, 'flv')
804 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
805 self._video_dimensions.get(format_param, '???'),
806 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
810 'url': video_real_url,
811 'uploader': video_uploader,
812 'uploader_id': video_uploader_id,
813 'upload_date': upload_date,
814 'title': video_title,
815 'ext': video_extension,
816 'format': video_format,
817 'thumbnail': video_thumbnail,
818 'description': video_description,
819 'player_url': player_url,
820 'subtitles': video_subtitles,
821 'duration': video_duration
825 class YoutubePlaylistIE(InfoExtractor):
826 IE_DESC = u'YouTube.com playlists'
832 (?:course|view_play_list|my_playlists|artist|playlist|watch)
833 \? (?:.*?&)*? (?:p|a|list)=
836 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
839 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
841 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
843 IE_NAME = u'youtube:playlist'
846 def suitable(cls, url):
847 """Receives a URL and returns True if suitable for this IE."""
848 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
850 def _real_extract(self, url):
851 # Extract playlist id
852 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
854 raise ExtractorError(u'Invalid URL: %s' % url)
856 # Download playlist videos from API
857 playlist_id = mobj.group(1) or mobj.group(2)
860 for page_num in itertools.count(1):
861 start_index = self._MAX_RESULTS * (page_num - 1) + 1
862 if start_index >= 1000:
863 self._downloader.report_warning(u'Max number of results reached')
865 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
866 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
869 response = json.loads(page)
870 except ValueError as err:
871 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
873 if 'feed' not in response:
874 raise ExtractorError(u'Got a malformed response from YouTube API')
875 playlist_title = response['feed']['title']['$t']
876 if 'entry' not in response['feed']:
877 # Number of videos is a multiple of self._MAX_RESULTS
880 for entry in response['feed']['entry']:
881 index = entry['yt$position']['$t']
882 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
885 'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
888 videos = [v[1] for v in sorted(videos)]
890 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
891 return [self.playlist_result(url_results, playlist_id, playlist_title)]
894 class YoutubeChannelIE(InfoExtractor):
895 IE_DESC = u'YouTube.com channels'
896 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
897 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
898 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
899 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
900 IE_NAME = u'youtube:channel'
902 def extract_videos_from_page(self, page):
904 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
905 if mobj.group(1) not in ids_in_page:
906 ids_in_page.append(mobj.group(1))
909 def _real_extract(self, url):
911 mobj = re.match(self._VALID_URL, url)
913 raise ExtractorError(u'Invalid URL: %s' % url)
915 # Download channel page
916 channel_id = mobj.group(1)
920 url = self._TEMPLATE_URL % (channel_id, pagenum)
921 page = self._download_webpage(url, channel_id,
922 u'Downloading page #%s' % pagenum)
924 # Extract video identifiers
925 ids_in_page = self.extract_videos_from_page(page)
926 video_ids.extend(ids_in_page)
928 # Download any subsequent channel pages using the json-based channel_ajax query
929 if self._MORE_PAGES_INDICATOR in page:
930 for pagenum in itertools.count(1):
931 url = self._MORE_PAGES_URL % (pagenum, channel_id)
932 page = self._download_webpage(url, channel_id,
933 u'Downloading page #%s' % pagenum)
935 page = json.loads(page)
937 ids_in_page = self.extract_videos_from_page(page['content_html'])
938 video_ids.extend(ids_in_page)
940 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
943 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
945 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
946 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
947 return [self.playlist_result(url_entries, channel_id)]
950 class YoutubeUserIE(InfoExtractor):
951 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
952 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
953 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
954 _GDATA_PAGE_SIZE = 50
955 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
956 IE_NAME = u'youtube:user'
959 def suitable(cls, url):
960 # Don't return True if the url can be extracted with other youtube
961 # extractor, the regex would is too permissive and it would match.
962 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
963 if any(ie.suitable(url) for ie in other_ies): return False
964 else: return super(YoutubeUserIE, cls).suitable(url)
966 def _real_extract(self, url):
968 mobj = re.match(self._VALID_URL, url)
970 raise ExtractorError(u'Invalid URL: %s' % url)
972 username = mobj.group(1)
974 # Download video ids using YouTube Data API. Result size per
975 # query is limited (currently to 50 videos) so we need to query
976 # page by page until there are no video ids - it means we got
981 for pagenum in itertools.count(0):
982 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
984 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
985 page = self._download_webpage(gdata_url, username,
986 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
989 response = json.loads(page)
990 except ValueError as err:
991 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
993 # Extract video identifiers
995 for entry in response['feed']['entry']:
996 ids_in_page.append(entry['id']['$t'].split('/')[-1])
997 video_ids.extend(ids_in_page)
999 # A little optimization - if current page is not
1000 # "full", ie. does not contain PAGE_SIZE video ids then
1001 # we can assume that this page is the last one - there
1002 # are no more ids on further pages - no need to query
1005 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1008 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1009 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1010 return [self.playlist_result(url_results, playlist_title = username)]
1012 class YoutubeSearchIE(SearchInfoExtractor):
1013 IE_DESC = u'YouTube.com searches'
1014 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1016 IE_NAME = u'youtube:search'
1017 _SEARCH_KEY = 'ytsearch'
1019 def report_download_page(self, query, pagenum):
1020 """Report attempt to download search page with given number."""
1021 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1023 def _get_n_results(self, query, n):
1024 """Get a specified number of results for a query"""
1030 while (50 * pagenum) < limit:
1031 self.report_download_page(query, pagenum+1)
1032 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1033 request = compat_urllib_request.Request(result_url)
1035 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1036 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1037 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1038 api_response = json.loads(data)['data']
1040 if not 'items' in api_response:
1041 raise ExtractorError(u'[youtube] No video results')
1043 new_ids = list(video['id'] for video in api_response['items'])
1044 video_ids += new_ids
1046 limit = min(n, api_response['totalItems'])
1049 if len(video_ids) > n:
1050 video_ids = video_ids[:n]
1051 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1052 return self.playlist_result(videos, query)
1055 class YoutubeShowIE(InfoExtractor):
1056 IE_DESC = u'YouTube.com (multi-season) shows'
1057 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1058 IE_NAME = u'youtube:show'
1060 def _real_extract(self, url):
1061 mobj = re.match(self._VALID_URL, url)
1062 show_name = mobj.group(1)
1063 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1064 # There's one playlist for each season of the show
1065 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1066 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1067 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1070 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1072 Base class for extractors that fetch info from
1073 http://www.youtube.com/feed_ajax
1074 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1076 _LOGIN_REQUIRED = True
1078 # use action_load_personal_feed instead of action_load_system_feed
1079 _PERSONAL_FEED = False
1082 def _FEED_TEMPLATE(self):
1083 action = 'action_load_system_feed'
1084 if self._PERSONAL_FEED:
1085 action = 'action_load_personal_feed'
1086 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1090 return u'youtube:%s' % self._FEED_NAME
1092 def _real_initialize(self):
1095 def _real_extract(self, url):
1097 # The step argument is available only in 2.7 or higher
1098 for i in itertools.count(0):
1099 paging = i*self._PAGING_STEP
1100 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1101 u'%s feed' % self._FEED_NAME,
1102 u'Downloading page %s' % i)
1103 info = json.loads(info)
1104 feed_html = info['feed_html']
1105 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1106 ids = orderedSet(m.group(1) for m in m_ids)
1107 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1108 if info['paging'] is None:
1110 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1112 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1113 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1114 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1115 _FEED_NAME = 'subscriptions'
1116 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1118 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1119 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1120 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1121 _FEED_NAME = 'recommended'
1122 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1124 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1125 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1126 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1127 _FEED_NAME = 'watch_later'
1128 _PLAYLIST_TITLE = u'Youtube Watch Later'
1130 _PERSONAL_FEED = True
1132 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1133 IE_NAME = u'youtube:favorites'
1134 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1135 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1136 _LOGIN_REQUIRED = True
1138 def _real_extract(self, url):
1139 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1140 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1141 return self.url_result(playlist_id, 'YoutubePlaylist')