9 from .common import InfoExtractor, SearchInfoExtractor
15 compat_urllib_request,
26 class YoutubeBaseInfoExtractor(InfoExtractor):
27 """Provide base functions for Youtube extractors"""
28 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
29 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
30 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
31 _NETRC_MACHINE = 'youtube'
32 # If True it will raise an error if no login info is provided
33 _LOGIN_REQUIRED = False
35 def report_lang(self):
36 """Report attempt to set language."""
37 self.to_screen(u'Setting language')
39 def _set_language(self):
40 request = compat_urllib_request.Request(self._LANG_URL)
43 compat_urllib_request.urlopen(request).read()
44 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
45 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
50 (username, password) = self._get_login_info()
51 # No authentication to be performed
53 if self._LOGIN_REQUIRED:
54 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
57 request = compat_urllib_request.Request(self._LOGIN_URL)
59 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
60 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
61 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
66 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
69 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
75 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
79 u'PersistentCookie': u'yes',
81 u'bgresponse': u'js_disabled',
82 u'checkConnection': u'',
83 u'checkedDomains': u'youtube',
89 u'signIn': u'Sign in',
91 u'service': u'youtube',
95 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
97 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
98 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
99 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
102 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
103 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
104 self._downloader.report_warning(u'unable to log in: bad username or password')
106 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
107 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
111 def _confirm_age(self):
114 'action_confirm': 'Confirm',
116 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
118 self.report_age_confirmation()
119 compat_urllib_request.urlopen(request).read().decode('utf-8')
120 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
121 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
124 def _real_initialize(self):
125 if self._downloader is None:
127 if not self._set_language():
129 if not self._login():
133 class YoutubeIE(YoutubeBaseInfoExtractor):
134 IE_DESC = u'YouTube.com'
137 (?:https?://)? # http(s):// (optional)
138 (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
139 tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
140 (?:.*?\#/)? # handle anchor (#/) redirect urls
141 (?: # the various things that can precede the ID:
142 (?:(?:v|embed|e)/) # v/ or embed/ or e/
143 |(?: # or the v= param in all its forms
144 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
145 (?:\?|\#!?) # the params delimiter ? or # or #!
146 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
150 |youtu\.be/ # just youtu.be/xxxx
152 )? # all until now is optional -> you can pass the naked ID
153 ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
154 (?(1).+)? # if we found the ID, everything can follow
156 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
157 # Listed in order of quality
158 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
159 # Apple HTTP Live Streaming
160 '96', '95', '94', '93', '92', '132', '151',
162 '85', '84', '102', '83', '101', '82', '100',
164 '138', '137', '248', '136', '247', '135', '246',
165 '245', '244', '134', '243', '133', '242', '160',
167 '141', '172', '140', '171', '139',
169 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
170 # Apple HTTP Live Streaming
171 '96', '95', '94', '93', '92', '132', '151',
173 '85', '102', '84', '101', '83', '100', '82',
175 '138', '248', '137', '247', '136', '246', '245',
176 '244', '135', '243', '134', '242', '133', '160',
178 '172', '141', '171', '140', '139',
180 _video_formats_map = {
181 'flv': ['35', '34', '6', '5'],
182 '3gp': ['36', '17', '13'],
183 'mp4': ['38', '37', '22', '18'],
184 'webm': ['46', '45', '44', '43'],
186 _video_extensions = {
208 # Apple HTTP Live Streaming
240 _video_dimensions = {
322 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
323 u"file": u"BaW_jenozKc.mp4",
325 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
326 u"uploader": u"Philipp Hagemeister",
327 u"uploader_id": u"phihag",
328 u"upload_date": u"20121002",
329 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
333 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
334 u"file": u"1ltcDfZMA3U.flv",
335 u"note": u"Test VEVO video (#897)",
337 u"upload_date": u"20070518",
338 u"title": u"Maps - It Will Find You",
339 u"description": u"Music video by Maps performing It Will Find You.",
340 u"uploader": u"MuteUSA",
341 u"uploader_id": u"MuteUSA"
345 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
346 u"file": u"UxxajLWwzqY.mp4",
347 u"note": u"Test generic use_cipher_signature video (#897)",
349 u"upload_date": u"20120506",
350 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
351 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
352 u"uploader": u"Icona Pop",
353 u"uploader_id": u"IconaPop"
357 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
358 u"file": u"07FYdnEawAQ.mp4",
359 u"note": u"Test VEVO video with age protection (#956)",
361 u"upload_date": u"20130703",
362 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
363 u"description": u"md5:64249768eec3bc4276236606ea996373",
364 u"uploader": u"justintimberlakeVEVO",
365 u"uploader_id": u"justintimberlakeVEVO"
369 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
370 u'file': u'TGi3HqYrWHE.mp4',
371 u'note': u'm3u8 video',
373 u'title': u'Triathlon - Men - London 2012 Olympic Games',
374 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
375 u'uploader': u'olympic',
376 u'upload_date': u'20120807',
377 u'uploader_id': u'olympic',
380 u'skip_download': True,
387 def suitable(cls, url):
388 """Receives a URL and returns True if suitable for this IE."""
389 if YoutubePlaylistIE.suitable(url): return False
390 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
392 def report_video_webpage_download(self, video_id):
393 """Report attempt to download video webpage."""
394 self.to_screen(u'%s: Downloading video webpage' % video_id)
396 def report_video_info_webpage_download(self, video_id):
397 """Report attempt to download video info webpage."""
398 self.to_screen(u'%s: Downloading video info webpage' % video_id)
400 def report_video_subtitles_download(self, video_id):
401 """Report attempt to download video info webpage."""
402 self.to_screen(u'%s: Checking available subtitles' % video_id)
404 def report_video_subtitles_request(self, video_id, sub_lang, format):
405 """Report attempt to download video info webpage."""
406 self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
408 def report_video_subtitles_available(self, video_id, sub_lang_list):
409 """Report available subtitles."""
410 sub_lang = ",".join(list(sub_lang_list.keys()))
411 self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
413 def report_information_extraction(self, video_id):
414 """Report attempt to extract video information."""
415 self.to_screen(u'%s: Extracting video information' % video_id)
417 def report_unavailable_format(self, video_id, format):
418 """Report extracted video URL."""
419 self.to_screen(u'%s: Format %s not available' % (video_id, format))
421 def report_rtmp_download(self):
422 """Indicate the download will use the RTMP protocol."""
423 self.to_screen(u'RTMP download detected')
425 def _decrypt_signature(self, s):
426 """Turn the encrypted s field into a working signature"""
429 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
431 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
433 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
435 return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
437 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
439 return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
441 return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
443 return s[81:36:-1] + s[0] + s[35:2:-1]
445 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
447 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
449 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
451 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
453 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
456 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
458 def _decrypt_signature_age_gate(self, s):
459 # The videos with age protection use another player, so the algorithms
462 return s[2:63] + s[82] + s[64:82] + s[63]
464 # Fallback to the other algortihms
465 return self._decrypt_signature(s)
468 def _get_available_subtitles(self, video_id):
469 self.report_video_subtitles_download(video_id)
470 request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
472 sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
473 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
474 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
476 sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
477 sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
478 if not sub_lang_list:
479 self._downloader.report_warning(u'video doesn\'t have subtitles')
483 def _list_available_subtitles(self, video_id):
484 sub_lang_list = self._get_available_subtitles(video_id)
485 self.report_video_subtitles_available(video_id, sub_lang_list)
487 def _request_subtitle(self, sub_lang, sub_name, video_id, format):
489 Return the subtitle as a string or None if they are not found
491 self.report_video_subtitles_request(video_id, sub_lang, format)
492 params = compat_urllib_parse.urlencode({
498 url = 'http://www.youtube.com/api/timedtext?' + params
500 sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
501 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
502 self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
505 self._downloader.report_warning(u'Did not fetch video subtitles')
509 def _request_automatic_caption(self, video_id, webpage):
510 """We need the webpage for getting the captions url, pass it as an
511 argument to speed up the process."""
512 sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
513 sub_format = self._downloader.params.get('subtitlesformat')
514 self.to_screen(u'%s: Looking for automatic captions' % video_id)
515 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
516 err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
518 self._downloader.report_warning(err_msg)
520 player_config = json.loads(mobj.group(1))
522 args = player_config[u'args']
523 caption_url = args[u'ttsurl']
524 timestamp = args[u'timestamp']
525 params = compat_urllib_parse.urlencode({
532 subtitles_url = caption_url + '&' + params
533 sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
534 return {sub_lang: sub}
535 # An extractor error can be raise by the download process if there are
536 # no automatic captions but there are subtitles
537 except (KeyError, ExtractorError):
538 self._downloader.report_warning(err_msg)
541 def _extract_subtitles(self, video_id):
543 Return a dictionary: {language: subtitles} or {} if the subtitles
546 available_subs_list = self._get_available_subtitles(video_id)
547 sub_format = self._downloader.params.get('subtitlesformat')
548 if not available_subs_list: #There was some error, it didn't get the available subtitles
550 if self._downloader.params.get('allsubtitles', False):
551 sub_lang_list = available_subs_list
553 if self._downloader.params.get('subtitleslangs', False):
554 reqested_langs = self._downloader.params.get('subtitleslangs')
555 elif 'en' in available_subs_list:
556 reqested_langs = ['en']
558 reqested_langs = [list(available_subs_list.keys())[0]]
561 for sub_lang in reqested_langs:
562 if not sub_lang in available_subs_list:
563 self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
565 sub_lang_list[sub_lang] = available_subs_list[sub_lang]
567 for sub_lang in sub_lang_list:
568 subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
570 subtitles[sub_lang] = subtitle
573 def _print_formats(self, formats):
574 print('Available formats:')
576 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
577 self._video_dimensions.get(x, '???'),
578 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
580 def _extract_id(self, url):
581 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
583 raise ExtractorError(u'Invalid URL: %s' % url)
584 video_id = mobj.group(2)
587 def _get_video_url_list(self, url_map):
589 Transform a dictionary in the format {itag:url} to a list of (itag, url)
590 with the requested formats.
592 req_format = self._downloader.params.get('format', None)
593 format_limit = self._downloader.params.get('format_limit', None)
594 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
595 if format_limit is not None and format_limit in available_formats:
596 format_list = available_formats[available_formats.index(format_limit):]
598 format_list = available_formats
599 existing_formats = [x for x in format_list if x in url_map]
600 if len(existing_formats) == 0:
601 raise ExtractorError(u'no known formats available for video')
602 if self._downloader.params.get('listformats', None):
603 self._print_formats(existing_formats)
605 if req_format is None or req_format == 'best':
606 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
607 elif req_format == 'worst':
608 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
609 elif req_format in ('-1', 'all'):
610 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
612 # Specific formats. We pick the first in a slash-delimeted sequence.
613 # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
614 # available in the specified format. For example,
615 # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
616 # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
617 # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
618 req_formats = req_format.split('/')
619 video_url_list = None
620 for rf in req_formats:
622 video_url_list = [(rf, url_map[rf])]
624 if rf in self._video_formats_map:
625 for srf in self._video_formats_map[rf]:
627 video_url_list = [(srf, url_map[srf])]
632 if video_url_list is None:
633 raise ExtractorError(u'requested format not available')
634 return video_url_list
636 def _extract_from_m3u8(self, manifest_url, video_id):
638 def _get_urls(_manifest):
639 lines = _manifest.split('\n')
640 urls = filter(lambda l: l and not l.startswith('#'),
643 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
644 formats_urls = _get_urls(manifest)
645 for format_url in formats_urls:
646 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
647 url_map[itag] = format_url
650 def _real_extract(self, url):
651 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
652 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
654 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
655 mobj = re.search(self._NEXT_URL_RE, url)
657 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
658 video_id = self._extract_id(url)
661 self.report_video_webpage_download(video_id)
662 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
663 request = compat_urllib_request.Request(url)
665 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
666 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
667 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
669 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
671 # Attempt to extract SWF player URL
672 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
674 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
679 self.report_video_info_webpage_download(video_id)
680 if re.search(r'player-age-gate-content">', video_webpage) is not None:
681 self.report_age_confirmation()
683 # We simulate the access to the video from www.youtube.com/v/{video_id}
684 # this can be viewed without login into Youtube
685 data = compat_urllib_parse.urlencode({'video_id': video_id,
689 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
693 video_info_url = 'https://www.youtube.com/get_video_info?' + data
694 video_info_webpage = self._download_webpage(video_info_url, video_id,
696 errnote='unable to download video info webpage')
697 video_info = compat_parse_qs(video_info_webpage)
700 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
701 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
702 % (video_id, el_type))
703 video_info_webpage = self._download_webpage(video_info_url, video_id,
705 errnote='unable to download video info webpage')
706 video_info = compat_parse_qs(video_info_webpage)
707 if 'token' in video_info:
709 if 'token' not in video_info:
710 if 'reason' in video_info:
711 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
713 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
715 # Check for "rental" videos
716 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
717 raise ExtractorError(u'"rental" videos not supported')
719 # Start extracting information
720 self.report_information_extraction(video_id)
723 if 'author' not in video_info:
724 raise ExtractorError(u'Unable to extract uploader name')
725 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
728 video_uploader_id = None
729 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
731 video_uploader_id = mobj.group(1)
733 self._downloader.report_warning(u'unable to extract uploader nickname')
736 if 'title' not in video_info:
737 raise ExtractorError(u'Unable to extract video title')
738 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
741 # We try first to get a high quality image:
742 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
743 video_webpage, re.DOTALL)
744 if m_thumb is not None:
745 video_thumbnail = m_thumb.group(1)
746 elif 'thumbnail_url' not in video_info:
747 self._downloader.report_warning(u'unable to extract video thumbnail')
749 else: # don't panic if we can't find it
750 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
754 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
756 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
757 upload_date = unified_strdate(upload_date)
760 video_description = get_element_by_id("eow-description", video_webpage)
761 if video_description:
762 video_description = clean_html(video_description)
764 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
766 video_description = unescapeHTML(fd_mobj.group(1))
768 video_description = u''
771 video_subtitles = None
773 if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
774 video_subtitles = self._extract_subtitles(video_id)
775 elif self._downloader.params.get('writeautomaticsub', False):
776 video_subtitles = self._request_automatic_caption(video_id, video_webpage)
778 if self._downloader.params.get('listsubtitles', False):
779 self._list_available_subtitles(video_id)
782 if 'length_seconds' not in video_info:
783 self._downloader.report_warning(u'unable to extract video duration')
786 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
788 # Decide which formats to download
791 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
793 raise ValueError('Could not find vevo ID')
794 info = json.loads(mobj.group(1))
796 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
797 # this signatures are encrypted
798 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
800 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
801 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
802 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
804 if 'url_encoded_fmt_stream_map' in video_info:
805 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
807 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
808 elif 'adaptive_fmts' in video_info:
809 if 'url_encoded_fmt_stream_map' in video_info:
810 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
812 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
816 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
817 self.report_rtmp_download()
818 video_url_list = [(None, video_info['conn'][0])]
819 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
820 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
821 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
823 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
824 url_data = compat_parse_qs(url_data_str)
825 if 'itag' in url_data and 'url' in url_data:
826 url = url_data['url'][0]
827 if 'sig' in url_data:
828 url += '&signature=' + url_data['sig'][0]
829 elif 's' in url_data:
830 if self._downloader.params.get('verbose'):
833 player_version = self._search_regex(r'ad3-(.+?)\.swf',
834 video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
835 'flash player', fatal=False)
836 player = 'flash player %s' % player_version
838 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
839 'html5 player', fatal=False)
840 parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
841 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
842 (len(s), parts_sizes, url_data['itag'][0], player))
843 encrypted_sig = url_data['s'][0]
845 signature = self._decrypt_signature_age_gate(encrypted_sig)
847 signature = self._decrypt_signature(encrypted_sig)
848 url += '&signature=' + signature
849 if 'ratebypass' not in url:
850 url += '&ratebypass=yes'
851 url_map[url_data['itag'][0]] = url
852 video_url_list = self._get_video_url_list(url_map)
853 if not video_url_list:
855 elif video_info.get('hlsvp'):
856 manifest_url = video_info['hlsvp'][0]
857 url_map = self._extract_from_m3u8(manifest_url, video_id)
858 video_url_list = self._get_video_url_list(url_map)
859 if not video_url_list:
863 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
866 for format_param, video_real_url in video_url_list:
868 video_extension = self._video_extensions.get(format_param, 'flv')
870 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
871 self._video_dimensions.get(format_param, '???'),
872 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
876 'url': video_real_url,
877 'uploader': video_uploader,
878 'uploader_id': video_uploader_id,
879 'upload_date': upload_date,
880 'title': video_title,
881 'ext': video_extension,
882 'format': video_format,
883 'thumbnail': video_thumbnail,
884 'description': video_description,
885 'player_url': player_url,
886 'subtitles': video_subtitles,
887 'duration': video_duration
891 class YoutubePlaylistIE(InfoExtractor):
892 IE_DESC = u'YouTube.com playlists'
898 (?:course|view_play_list|my_playlists|artist|playlist|watch)
899 \? (?:.*?&)*? (?:p|a|list)=
902 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
905 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
907 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
909 IE_NAME = u'youtube:playlist'
912 def suitable(cls, url):
913 """Receives a URL and returns True if suitable for this IE."""
914 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
916 def _real_extract(self, url):
917 # Extract playlist id
918 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
920 raise ExtractorError(u'Invalid URL: %s' % url)
922 # Download playlist videos from API
923 playlist_id = mobj.group(1) or mobj.group(2)
926 for page_num in itertools.count(1):
927 start_index = self._MAX_RESULTS * (page_num - 1) + 1
928 if start_index >= 1000:
929 self._downloader.report_warning(u'Max number of results reached')
931 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
932 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
935 response = json.loads(page)
936 except ValueError as err:
937 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
939 if 'feed' not in response:
940 raise ExtractorError(u'Got a malformed response from YouTube API')
941 playlist_title = response['feed']['title']['$t']
942 if 'entry' not in response['feed']:
943 # Number of videos is a multiple of self._MAX_RESULTS
946 for entry in response['feed']['entry']:
947 index = entry['yt$position']['$t']
948 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
951 'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
954 videos = [v[1] for v in sorted(videos)]
956 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
957 return [self.playlist_result(url_results, playlist_id, playlist_title)]
960 class YoutubeChannelIE(InfoExtractor):
961 IE_DESC = u'YouTube.com channels'
962 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
963 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
964 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
965 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
966 IE_NAME = u'youtube:channel'
968 def extract_videos_from_page(self, page):
970 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
971 if mobj.group(1) not in ids_in_page:
972 ids_in_page.append(mobj.group(1))
975 def _real_extract(self, url):
977 mobj = re.match(self._VALID_URL, url)
979 raise ExtractorError(u'Invalid URL: %s' % url)
981 # Download channel page
982 channel_id = mobj.group(1)
986 url = self._TEMPLATE_URL % (channel_id, pagenum)
987 page = self._download_webpage(url, channel_id,
988 u'Downloading page #%s' % pagenum)
990 # Extract video identifiers
991 ids_in_page = self.extract_videos_from_page(page)
992 video_ids.extend(ids_in_page)
994 # Download any subsequent channel pages using the json-based channel_ajax query
995 if self._MORE_PAGES_INDICATOR in page:
996 for pagenum in itertools.count(1):
997 url = self._MORE_PAGES_URL % (pagenum, channel_id)
998 page = self._download_webpage(url, channel_id,
999 u'Downloading page #%s' % pagenum)
1001 page = json.loads(page)
1003 ids_in_page = self.extract_videos_from_page(page['content_html'])
1004 video_ids.extend(ids_in_page)
1006 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1009 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1011 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1012 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1013 return [self.playlist_result(url_entries, channel_id)]
1016 class YoutubeUserIE(InfoExtractor):
1017 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1018 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1019 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1020 _GDATA_PAGE_SIZE = 50
1021 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1022 IE_NAME = u'youtube:user'
1025 def suitable(cls, url):
1026 # Don't return True if the url can be extracted with other youtube
1027 # extractor, the regex would is too permissive and it would match.
1028 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1029 if any(ie.suitable(url) for ie in other_ies): return False
1030 else: return super(YoutubeUserIE, cls).suitable(url)
1032 def _real_extract(self, url):
1034 mobj = re.match(self._VALID_URL, url)
1036 raise ExtractorError(u'Invalid URL: %s' % url)
1038 username = mobj.group(1)
1040 # Download video ids using YouTube Data API. Result size per
1041 # query is limited (currently to 50 videos) so we need to query
1042 # page by page until there are no video ids - it means we got
1047 for pagenum in itertools.count(0):
1048 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1050 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1051 page = self._download_webpage(gdata_url, username,
1052 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1055 response = json.loads(page)
1056 except ValueError as err:
1057 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1059 # Extract video identifiers
1061 for entry in response['feed']['entry']:
1062 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1063 video_ids.extend(ids_in_page)
1065 # A little optimization - if current page is not
1066 # "full", ie. does not contain PAGE_SIZE video ids then
1067 # we can assume that this page is the last one - there
1068 # are no more ids on further pages - no need to query
1071 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1074 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1075 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1076 return [self.playlist_result(url_results, playlist_title = username)]
1078 class YoutubeSearchIE(SearchInfoExtractor):
1079 IE_DESC = u'YouTube.com searches'
1080 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1082 IE_NAME = u'youtube:search'
1083 _SEARCH_KEY = 'ytsearch'
1085 def report_download_page(self, query, pagenum):
1086 """Report attempt to download search page with given number."""
1087 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1089 def _get_n_results(self, query, n):
1090 """Get a specified number of results for a query"""
1096 while (50 * pagenum) < limit:
1097 self.report_download_page(query, pagenum+1)
1098 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1099 request = compat_urllib_request.Request(result_url)
1101 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1102 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1103 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1104 api_response = json.loads(data)['data']
1106 if not 'items' in api_response:
1107 raise ExtractorError(u'[youtube] No video results')
1109 new_ids = list(video['id'] for video in api_response['items'])
1110 video_ids += new_ids
1112 limit = min(n, api_response['totalItems'])
1115 if len(video_ids) > n:
1116 video_ids = video_ids[:n]
1117 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1118 return self.playlist_result(videos, query)
1121 class YoutubeShowIE(InfoExtractor):
1122 IE_DESC = u'YouTube.com (multi-season) shows'
1123 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1124 IE_NAME = u'youtube:show'
1126 def _real_extract(self, url):
1127 mobj = re.match(self._VALID_URL, url)
1128 show_name = mobj.group(1)
1129 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1130 # There's one playlist for each season of the show
1131 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1132 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1133 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1136 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1138 Base class for extractors that fetch info from
1139 http://www.youtube.com/feed_ajax
1140 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1142 _LOGIN_REQUIRED = True
1144 # use action_load_personal_feed instead of action_load_system_feed
1145 _PERSONAL_FEED = False
1148 def _FEED_TEMPLATE(self):
1149 action = 'action_load_system_feed'
1150 if self._PERSONAL_FEED:
1151 action = 'action_load_personal_feed'
1152 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1156 return u'youtube:%s' % self._FEED_NAME
1158 def _real_initialize(self):
1161 def _real_extract(self, url):
1163 # The step argument is available only in 2.7 or higher
1164 for i in itertools.count(0):
1165 paging = i*self._PAGING_STEP
1166 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1167 u'%s feed' % self._FEED_NAME,
1168 u'Downloading page %s' % i)
1169 info = json.loads(info)
1170 feed_html = info['feed_html']
1171 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1172 ids = orderedSet(m.group(1) for m in m_ids)
1173 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1174 if info['paging'] is None:
1176 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1178 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1179 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1180 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1181 _FEED_NAME = 'subscriptions'
1182 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1184 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1185 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1186 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1187 _FEED_NAME = 'recommended'
1188 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1190 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1191 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1192 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1193 _FEED_NAME = 'watch_later'
1194 _PLAYLIST_TITLE = u'Youtube Watch Later'
1196 _PERSONAL_FEED = True
1198 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1199 IE_NAME = u'youtube:favorites'
1200 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1201 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1202 _LOGIN_REQUIRED = True
1204 def _real_extract(self, url):
1205 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1206 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1207 return self.url_result(playlist_id, 'YoutubePlaylist')