9 from .common import InfoExtractor, SearchInfoExtractor
15 compat_urllib_request,
26 class YoutubeBaseInfoExtractor(InfoExtractor):
27 """Provide base functions for Youtube extractors"""
28 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
29 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
30 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
31 _NETRC_MACHINE = 'youtube'
32 # If True it will raise an error if no login info is provided
33 _LOGIN_REQUIRED = False
35 def report_lang(self):
36 """Report attempt to set language."""
37 self.to_screen(u'Setting language')
39 def _set_language(self):
40 request = compat_urllib_request.Request(self._LANG_URL)
43 compat_urllib_request.urlopen(request).read()
44 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
45 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
50 (username, password) = self._get_login_info()
51 # No authentication to be performed
53 if self._LOGIN_REQUIRED:
54 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
57 request = compat_urllib_request.Request(self._LOGIN_URL)
59 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
60 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
61 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
66 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
69 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
75 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
79 u'PersistentCookie': u'yes',
81 u'bgresponse': u'js_disabled',
82 u'checkConnection': u'',
83 u'checkedDomains': u'youtube',
89 u'signIn': u'Sign in',
91 u'service': u'youtube',
95 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
97 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
98 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
99 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
102 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
103 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
104 self._downloader.report_warning(u'unable to log in: bad username or password')
106 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
107 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
111 def _confirm_age(self):
114 'action_confirm': 'Confirm',
116 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
118 self.report_age_confirmation()
119 compat_urllib_request.urlopen(request).read().decode('utf-8')
120 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
121 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
124 def _real_initialize(self):
125 if self._downloader is None:
127 if not self._set_language():
129 if not self._login():
133 class YoutubeIE(YoutubeBaseInfoExtractor):
134 IE_DESC = u'YouTube.com'
137 (?:https?://)? # http(s):// (optional)
138 (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
139 tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
140 (?:.*?\#/)? # handle anchor (#/) redirect urls
141 (?: # the various things that can precede the ID:
142 (?:(?:v|embed|e)/) # v/ or embed/ or e/
143 |(?: # or the v= param in all its forms
144 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
145 (?:\?|\#!?) # the params delimiter ? or # or #!
146 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
149 )? # optional -> youtube.com/xxxx is OK
150 )? # all until now is optional -> you can pass the naked ID
151 ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
152 (?(1).+)? # if we found the ID, everything can follow
154 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
155 # Listed in order of quality
156 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
157 # Apple HTTP Live Streaming
158 '96', '95', '94', '93', '92', '132', '151',
160 '85', '84', '102', '83', '101', '82', '100',
162 '138', '137', '248', '136', '247', '135', '246',
163 '245', '244', '134', '243', '133', '242', '160',
165 '141', '172', '140', '171', '139',
167 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
168 # Apple HTTP Live Streaming
169 '96', '95', '94', '93', '92', '132', '151',
171 '85', '102', '84', '101', '83', '100', '82',
173 '138', '248', '137', '247', '136', '246', '245',
174 '244', '135', '243', '134', '242', '133', '160',
176 '172', '141', '171', '140', '139',
178 _video_formats_map = {
179 'flv': ['35', '34', '6', '5'],
180 '3gp': ['36', '17', '13'],
181 'mp4': ['38', '37', '22', '18'],
182 'webm': ['46', '45', '44', '43'],
184 _video_extensions = {
206 # Apple HTTP Live Streaming
238 _video_dimensions = {
320 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
321 u"file": u"BaW_jenozKc.mp4",
323 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
324 u"uploader": u"Philipp Hagemeister",
325 u"uploader_id": u"phihag",
326 u"upload_date": u"20121002",
327 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
331 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
332 u"file": u"1ltcDfZMA3U.flv",
333 u"note": u"Test VEVO video (#897)",
335 u"upload_date": u"20070518",
336 u"title": u"Maps - It Will Find You",
337 u"description": u"Music video by Maps performing It Will Find You.",
338 u"uploader": u"MuteUSA",
339 u"uploader_id": u"MuteUSA"
343 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
344 u"file": u"UxxajLWwzqY.mp4",
345 u"note": u"Test generic use_cipher_signature video (#897)",
347 u"upload_date": u"20120506",
348 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
349 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
350 u"uploader": u"Icona Pop",
351 u"uploader_id": u"IconaPop"
355 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
356 u"file": u"07FYdnEawAQ.mp4",
357 u"note": u"Test VEVO video with age protection (#956)",
359 u"upload_date": u"20130703",
360 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
361 u"description": u"md5:64249768eec3bc4276236606ea996373",
362 u"uploader": u"justintimberlakeVEVO",
363 u"uploader_id": u"justintimberlakeVEVO"
367 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
368 u'file': u'TGi3HqYrWHE.mp4',
369 u'note': u'm3u8 video',
371 u'title': u'Triathlon - Men - London 2012 Olympic Games',
372 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
373 u'uploader': u'olympic',
374 u'upload_date': u'20120807',
375 u'uploader_id': u'olympic',
378 u'skip_download': True,
385 def suitable(cls, url):
386 """Receives a URL and returns True if suitable for this IE."""
387 if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
388 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
390 def report_video_webpage_download(self, video_id):
391 """Report attempt to download video webpage."""
392 self.to_screen(u'%s: Downloading video webpage' % video_id)
394 def report_video_info_webpage_download(self, video_id):
395 """Report attempt to download video info webpage."""
396 self.to_screen(u'%s: Downloading video info webpage' % video_id)
398 def report_video_subtitles_download(self, video_id):
399 """Report attempt to download video info webpage."""
400 self.to_screen(u'%s: Checking available subtitles' % video_id)
402 def report_video_subtitles_request(self, video_id, sub_lang, format):
403 """Report attempt to download video info webpage."""
404 self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
406 def report_video_subtitles_available(self, video_id, sub_lang_list):
407 """Report available subtitles."""
408 sub_lang = ",".join(list(sub_lang_list.keys()))
409 self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
411 def report_information_extraction(self, video_id):
412 """Report attempt to extract video information."""
413 self.to_screen(u'%s: Extracting video information' % video_id)
415 def report_unavailable_format(self, video_id, format):
416 """Report extracted video URL."""
417 self.to_screen(u'%s: Format %s not available' % (video_id, format))
419 def report_rtmp_download(self):
420 """Indicate the download will use the RTMP protocol."""
421 self.to_screen(u'RTMP download detected')
423 def _decrypt_signature(self, s):
424 """Turn the encrypted s field into a working signature"""
427 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
429 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
431 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
433 return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
435 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
437 return s[81:73:-1] + s[84] + s[72:58:-1] + s[0] + s[57:35:-1] + s[85] + s[34:0:-1]
439 return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
441 return s[81:36:-1] + s[0] + s[35:2:-1]
443 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
445 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
447 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
449 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
451 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
454 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
456 def _decrypt_signature_age_gate(self, s):
457 # The videos with age protection use another player, so the algorithms
460 return s[2:63] + s[82] + s[64:82] + s[63]
462 # Fallback to the other algortihms
463 return self._decrypt_signature(s)
466 def _get_available_subtitles(self, video_id):
467 self.report_video_subtitles_download(video_id)
468 request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
470 sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
471 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
472 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
474 sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
475 sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
476 if not sub_lang_list:
477 self._downloader.report_warning(u'video doesn\'t have subtitles')
481 def _list_available_subtitles(self, video_id):
482 sub_lang_list = self._get_available_subtitles(video_id)
483 self.report_video_subtitles_available(video_id, sub_lang_list)
485 def _request_subtitle(self, sub_lang, sub_name, video_id, format):
487 Return the subtitle as a string or None if they are not found
489 self.report_video_subtitles_request(video_id, sub_lang, format)
490 params = compat_urllib_parse.urlencode({
496 url = 'http://www.youtube.com/api/timedtext?' + params
498 sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
499 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
500 self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
503 self._downloader.report_warning(u'Did not fetch video subtitles')
507 def _request_automatic_caption(self, video_id, webpage):
508 """We need the webpage for getting the captions url, pass it as an
509 argument to speed up the process."""
510 sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
511 sub_format = self._downloader.params.get('subtitlesformat')
512 self.to_screen(u'%s: Looking for automatic captions' % video_id)
513 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
514 err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
516 self._downloader.report_warning(err_msg)
518 player_config = json.loads(mobj.group(1))
520 args = player_config[u'args']
521 caption_url = args[u'ttsurl']
522 timestamp = args[u'timestamp']
523 params = compat_urllib_parse.urlencode({
530 subtitles_url = caption_url + '&' + params
531 sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
532 return {sub_lang: sub}
533 # An extractor error can be raise by the download process if there are
534 # no automatic captions but there are subtitles
535 except (KeyError, ExtractorError):
536 self._downloader.report_warning(err_msg)
539 def _extract_subtitles(self, video_id):
541 Return a dictionary: {language: subtitles} or {} if the subtitles
544 available_subs_list = self._get_available_subtitles(video_id)
545 sub_format = self._downloader.params.get('subtitlesformat')
546 if not available_subs_list: #There was some error, it didn't get the available subtitles
548 if self._downloader.params.get('allsubtitles', False):
549 sub_lang_list = available_subs_list
551 if self._downloader.params.get('subtitleslangs', False):
552 reqested_langs = self._downloader.params.get('subtitleslangs')
553 elif 'en' in available_subs_list:
554 reqested_langs = ['en']
556 reqested_langs = [list(available_subs_list.keys())[0]]
559 for sub_lang in reqested_langs:
560 if not sub_lang in available_subs_list:
561 self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
563 sub_lang_list[sub_lang] = available_subs_list[sub_lang]
565 for sub_lang in sub_lang_list:
566 subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
568 subtitles[sub_lang] = subtitle
571 def _print_formats(self, formats):
572 print('Available formats:')
574 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
575 self._video_dimensions.get(x, '???'),
576 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
578 def _extract_id(self, url):
579 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
581 raise ExtractorError(u'Invalid URL: %s' % url)
582 video_id = mobj.group(2)
585 def _get_video_url_list(self, url_map):
587 Transform a dictionary in the format {itag:url} to a list of (itag, url)
588 with the requested formats.
590 req_format = self._downloader.params.get('format', None)
591 format_limit = self._downloader.params.get('format_limit', None)
592 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
593 if format_limit is not None and format_limit in available_formats:
594 format_list = available_formats[available_formats.index(format_limit):]
596 format_list = available_formats
597 existing_formats = [x for x in format_list if x in url_map]
598 if len(existing_formats) == 0:
599 raise ExtractorError(u'no known formats available for video')
600 if self._downloader.params.get('listformats', None):
601 self._print_formats(existing_formats)
603 if req_format is None or req_format == 'best':
604 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
605 elif req_format == 'worst':
606 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
607 elif req_format in ('-1', 'all'):
608 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
610 # Specific formats. We pick the first in a slash-delimeted sequence.
611 # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
612 # available in the specified format. For example,
613 # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
614 # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
615 # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
616 req_formats = req_format.split('/')
617 video_url_list = None
618 for rf in req_formats:
620 video_url_list = [(rf, url_map[rf])]
622 if rf in self._video_formats_map:
623 for srf in self._video_formats_map[rf]:
625 video_url_list = [(srf, url_map[srf])]
630 if video_url_list is None:
631 raise ExtractorError(u'requested format not available')
632 return video_url_list
634 def _extract_from_m3u8(self, manifest_url, video_id):
636 def _get_urls(_manifest):
637 lines = _manifest.split('\n')
638 urls = filter(lambda l: l and not l.startswith('#'),
641 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
642 formats_urls = _get_urls(manifest)
643 for format_url in formats_urls:
644 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
645 url_map[itag] = format_url
648 def _real_extract(self, url):
649 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
650 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
652 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
653 mobj = re.search(self._NEXT_URL_RE, url)
655 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
656 video_id = self._extract_id(url)
659 self.report_video_webpage_download(video_id)
660 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
661 request = compat_urllib_request.Request(url)
663 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
664 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
665 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
667 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
669 # Attempt to extract SWF player URL
670 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
672 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
677 self.report_video_info_webpage_download(video_id)
678 if re.search(r'player-age-gate-content">', video_webpage) is not None:
679 self.report_age_confirmation()
681 # We simulate the access to the video from www.youtube.com/v/{video_id}
682 # this can be viewed without login into Youtube
683 data = compat_urllib_parse.urlencode({'video_id': video_id,
687 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
691 video_info_url = 'https://www.youtube.com/get_video_info?' + data
692 video_info_webpage = self._download_webpage(video_info_url, video_id,
694 errnote='unable to download video info webpage')
695 video_info = compat_parse_qs(video_info_webpage)
698 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
699 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
700 % (video_id, el_type))
701 video_info_webpage = self._download_webpage(video_info_url, video_id,
703 errnote='unable to download video info webpage')
704 video_info = compat_parse_qs(video_info_webpage)
705 if 'token' in video_info:
707 if 'token' not in video_info:
708 if 'reason' in video_info:
709 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
711 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
713 # Check for "rental" videos
714 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
715 raise ExtractorError(u'"rental" videos not supported')
717 # Start extracting information
718 self.report_information_extraction(video_id)
721 if 'author' not in video_info:
722 raise ExtractorError(u'Unable to extract uploader name')
723 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
726 video_uploader_id = None
727 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
729 video_uploader_id = mobj.group(1)
731 self._downloader.report_warning(u'unable to extract uploader nickname')
734 if 'title' not in video_info:
735 raise ExtractorError(u'Unable to extract video title')
736 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
739 # We try first to get a high quality image:
740 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
741 video_webpage, re.DOTALL)
742 if m_thumb is not None:
743 video_thumbnail = m_thumb.group(1)
744 elif 'thumbnail_url' not in video_info:
745 self._downloader.report_warning(u'unable to extract video thumbnail')
747 else: # don't panic if we can't find it
748 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
752 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
754 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
755 upload_date = unified_strdate(upload_date)
758 video_description = get_element_by_id("eow-description", video_webpage)
759 if video_description:
760 video_description = clean_html(video_description)
762 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
764 video_description = unescapeHTML(fd_mobj.group(1))
766 video_description = u''
769 video_subtitles = None
771 if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
772 video_subtitles = self._extract_subtitles(video_id)
773 elif self._downloader.params.get('writeautomaticsub', False):
774 video_subtitles = self._request_automatic_caption(video_id, video_webpage)
776 if self._downloader.params.get('listsubtitles', False):
777 self._list_available_subtitles(video_id)
780 if 'length_seconds' not in video_info:
781 self._downloader.report_warning(u'unable to extract video duration')
784 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
786 # Decide which formats to download
789 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
791 raise ValueError('Could not find vevo ID')
792 info = json.loads(mobj.group(1))
794 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
795 # this signatures are encrypted
796 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
798 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
799 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
800 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
802 if 'url_encoded_fmt_stream_map' in video_info:
803 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
805 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
806 elif 'adaptive_fmts' in video_info:
807 if 'url_encoded_fmt_stream_map' in video_info:
808 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
810 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
814 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
815 self.report_rtmp_download()
816 video_url_list = [(None, video_info['conn'][0])]
817 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
818 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
819 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
821 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
822 url_data = compat_parse_qs(url_data_str)
823 if 'itag' in url_data and 'url' in url_data:
824 url = url_data['url'][0]
825 if 'sig' in url_data:
826 url += '&signature=' + url_data['sig'][0]
827 elif 's' in url_data:
828 if self._downloader.params.get('verbose'):
831 player_version = self._search_regex(r'ad3-(.+?)\.swf',
832 video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
833 'flash player', fatal=False)
834 player = 'flash player %s' % player_version
836 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
837 'html5 player', fatal=False)
838 parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
839 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
840 (len(s), parts_sizes, url_data['itag'][0], player))
841 encrypted_sig = url_data['s'][0]
843 signature = self._decrypt_signature_age_gate(encrypted_sig)
845 signature = self._decrypt_signature(encrypted_sig)
846 url += '&signature=' + signature
847 if 'ratebypass' not in url:
848 url += '&ratebypass=yes'
849 url_map[url_data['itag'][0]] = url
850 video_url_list = self._get_video_url_list(url_map)
851 if not video_url_list:
853 elif video_info.get('hlsvp'):
854 manifest_url = video_info['hlsvp'][0]
855 url_map = self._extract_from_m3u8(manifest_url, video_id)
856 video_url_list = self._get_video_url_list(url_map)
857 if not video_url_list:
861 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
864 for format_param, video_real_url in video_url_list:
866 video_extension = self._video_extensions.get(format_param, 'flv')
868 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
869 self._video_dimensions.get(format_param, '???'),
870 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
874 'url': video_real_url,
875 'uploader': video_uploader,
876 'uploader_id': video_uploader_id,
877 'upload_date': upload_date,
878 'title': video_title,
879 'ext': video_extension,
880 'format': video_format,
881 'thumbnail': video_thumbnail,
882 'description': video_description,
883 'player_url': player_url,
884 'subtitles': video_subtitles,
885 'duration': video_duration
889 class YoutubePlaylistIE(InfoExtractor):
890 IE_DESC = u'YouTube.com playlists'
896 (?:course|view_play_list|my_playlists|artist|playlist|watch)
897 \? (?:.*?&)*? (?:p|a|list)=
900 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
903 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
905 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
907 IE_NAME = u'youtube:playlist'
910 def suitable(cls, url):
911 """Receives a URL and returns True if suitable for this IE."""
912 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
914 def _real_extract(self, url):
915 # Extract playlist id
916 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
918 raise ExtractorError(u'Invalid URL: %s' % url)
920 # Download playlist videos from API
921 playlist_id = mobj.group(1) or mobj.group(2)
924 for page_num in itertools.count(1):
925 start_index = self._MAX_RESULTS * (page_num - 1) + 1
926 if start_index >= 1000:
927 self._downloader.report_warning(u'Max number of results reached')
929 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
930 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
933 response = json.loads(page)
934 except ValueError as err:
935 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
937 if 'feed' not in response:
938 raise ExtractorError(u'Got a malformed response from YouTube API')
939 playlist_title = response['feed']['title']['$t']
940 if 'entry' not in response['feed']:
941 # Number of videos is a multiple of self._MAX_RESULTS
944 for entry in response['feed']['entry']:
945 index = entry['yt$position']['$t']
946 if 'media$group' in entry and 'media$player' in entry['media$group']:
947 videos.append((index, entry['media$group']['media$player']['url']))
949 videos = [v[1] for v in sorted(videos)]
951 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
952 return [self.playlist_result(url_results, playlist_id, playlist_title)]
955 class YoutubeChannelIE(InfoExtractor):
956 IE_DESC = u'YouTube.com channels'
957 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
958 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
959 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
960 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
961 IE_NAME = u'youtube:channel'
963 def extract_videos_from_page(self, page):
965 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
966 if mobj.group(1) not in ids_in_page:
967 ids_in_page.append(mobj.group(1))
970 def _real_extract(self, url):
972 mobj = re.match(self._VALID_URL, url)
974 raise ExtractorError(u'Invalid URL: %s' % url)
976 # Download channel page
977 channel_id = mobj.group(1)
981 url = self._TEMPLATE_URL % (channel_id, pagenum)
982 page = self._download_webpage(url, channel_id,
983 u'Downloading page #%s' % pagenum)
985 # Extract video identifiers
986 ids_in_page = self.extract_videos_from_page(page)
987 video_ids.extend(ids_in_page)
989 # Download any subsequent channel pages using the json-based channel_ajax query
990 if self._MORE_PAGES_INDICATOR in page:
991 for pagenum in itertools.count(1):
992 url = self._MORE_PAGES_URL % (pagenum, channel_id)
993 page = self._download_webpage(url, channel_id,
994 u'Downloading page #%s' % pagenum)
996 page = json.loads(page)
998 ids_in_page = self.extract_videos_from_page(page['content_html'])
999 video_ids.extend(ids_in_page)
1001 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1004 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1006 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1007 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1008 return [self.playlist_result(url_entries, channel_id)]
1011 class YoutubeUserIE(InfoExtractor):
1012 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1013 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
1014 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1015 _GDATA_PAGE_SIZE = 50
1016 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
1017 _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
1018 IE_NAME = u'youtube:user'
1020 def _real_extract(self, url):
1022 mobj = re.match(self._VALID_URL, url)
1024 raise ExtractorError(u'Invalid URL: %s' % url)
1026 username = mobj.group(1)
1028 # Download video ids using YouTube Data API. Result size per
1029 # query is limited (currently to 50 videos) so we need to query
1030 # page by page until there are no video ids - it means we got
1035 for pagenum in itertools.count(0):
1036 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1038 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1039 page = self._download_webpage(gdata_url, username,
1040 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1042 # Extract video identifiers
1045 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1046 if mobj.group(1) not in ids_in_page:
1047 ids_in_page.append(mobj.group(1))
1049 video_ids.extend(ids_in_page)
1051 # A little optimization - if current page is not
1052 # "full", ie. does not contain PAGE_SIZE video ids then
1053 # we can assume that this page is the last one - there
1054 # are no more ids on further pages - no need to query
1057 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1060 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1061 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1062 return [self.playlist_result(url_results, playlist_title = username)]
1064 class YoutubeSearchIE(SearchInfoExtractor):
1065 IE_DESC = u'YouTube.com searches'
1066 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1068 IE_NAME = u'youtube:search'
1069 _SEARCH_KEY = 'ytsearch'
1071 def report_download_page(self, query, pagenum):
1072 """Report attempt to download search page with given number."""
1073 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1075 def _get_n_results(self, query, n):
1076 """Get a specified number of results for a query"""
1082 while (50 * pagenum) < limit:
1083 self.report_download_page(query, pagenum+1)
1084 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1085 request = compat_urllib_request.Request(result_url)
1087 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1088 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1089 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1090 api_response = json.loads(data)['data']
1092 if not 'items' in api_response:
1093 raise ExtractorError(u'[youtube] No video results')
1095 new_ids = list(video['id'] for video in api_response['items'])
1096 video_ids += new_ids
1098 limit = min(n, api_response['totalItems'])
1101 if len(video_ids) > n:
1102 video_ids = video_ids[:n]
1103 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1104 return self.playlist_result(videos, query)
1107 class YoutubeShowIE(InfoExtractor):
1108 IE_DESC = u'YouTube.com (multi-season) shows'
1109 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1110 IE_NAME = u'youtube:show'
1112 def _real_extract(self, url):
1113 mobj = re.match(self._VALID_URL, url)
1114 show_name = mobj.group(1)
1115 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1116 # There's one playlist for each season of the show
1117 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1118 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1119 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1122 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1124 Base class for extractors that fetch info from
1125 http://www.youtube.com/feed_ajax
1126 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1128 _LOGIN_REQUIRED = True
1130 # use action_load_personal_feed instead of action_load_system_feed
1131 _PERSONAL_FEED = False
1134 def _FEED_TEMPLATE(self):
1135 action = 'action_load_system_feed'
1136 if self._PERSONAL_FEED:
1137 action = 'action_load_personal_feed'
1138 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1142 return u'youtube:%s' % self._FEED_NAME
1144 def _real_initialize(self):
1147 def _real_extract(self, url):
1149 # The step argument is available only in 2.7 or higher
1150 for i in itertools.count(0):
1151 paging = i*self._PAGING_STEP
1152 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1153 u'%s feed' % self._FEED_NAME,
1154 u'Downloading page %s' % i)
1155 info = json.loads(info)
1156 feed_html = info['feed_html']
1157 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1158 ids = orderedSet(m.group(1) for m in m_ids)
1159 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1160 if info['paging'] is None:
1162 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1164 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1165 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1166 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1167 _FEED_NAME = 'subscriptions'
1168 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1170 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1171 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1172 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1173 _FEED_NAME = 'recommended'
1174 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1176 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1177 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1178 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1179 _FEED_NAME = 'watch_later'
1180 _PLAYLIST_TITLE = u'Youtube Watch Later'
1182 _PERSONAL_FEED = True
1184 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1185 IE_NAME = u'youtube:favorites'
1186 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1187 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1188 _LOGIN_REQUIRED = True
1190 def _real_extract(self, url):
1191 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1192 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1193 return self.url_result(playlist_id, 'YoutubePlaylist')