9 from .common import InfoExtractor, SearchInfoExtractor
15 compat_urllib_request,
26 class YoutubeBaseInfoExtractor(InfoExtractor):
27 """Provide base functions for Youtube extractors"""
28 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
29 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
30 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
31 _NETRC_MACHINE = 'youtube'
32 # If True it will raise an error if no login info is provided
33 _LOGIN_REQUIRED = False
35 def report_lang(self):
36 """Report attempt to set language."""
37 self.to_screen(u'Setting language')
39 def _set_language(self):
40 request = compat_urllib_request.Request(self._LANG_URL)
43 compat_urllib_request.urlopen(request).read()
44 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
45 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
50 (username, password) = self._get_login_info()
51 # No authentication to be performed
53 if self._LOGIN_REQUIRED:
54 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
57 request = compat_urllib_request.Request(self._LOGIN_URL)
59 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
60 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
61 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
66 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
69 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
75 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
79 u'PersistentCookie': u'yes',
81 u'bgresponse': u'js_disabled',
82 u'checkConnection': u'',
83 u'checkedDomains': u'youtube',
89 u'signIn': u'Sign in',
91 u'service': u'youtube',
95 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
97 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
98 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
99 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
102 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
103 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
104 self._downloader.report_warning(u'unable to log in: bad username or password')
106 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
107 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
111 def _confirm_age(self):
114 'action_confirm': 'Confirm',
116 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
118 self.report_age_confirmation()
119 compat_urllib_request.urlopen(request).read().decode('utf-8')
120 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
121 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
124 def _real_initialize(self):
125 if self._downloader is None:
127 if not self._set_language():
129 if not self._login():
133 class YoutubeIE(YoutubeBaseInfoExtractor):
134 IE_DESC = u'YouTube.com'
137 (?:https?://)? # http(s):// (optional)
138 (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
139 tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
140 (?:.*?\#/)? # handle anchor (#/) redirect urls
141 (?: # the various things that can precede the ID:
142 (?:(?:v|embed|e)/) # v/ or embed/ or e/
143 |(?: # or the v= param in all its forms
144 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
145 (?:\?|\#!?) # the params delimiter ? or # or #!
146 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
149 )? # optional -> youtube.com/xxxx is OK
150 )? # all until now is optional -> you can pass the naked ID
151 ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
152 (?(1).+)? # if we found the ID, everything can follow
154 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
155 # Listed in order of quality
156 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
157 '95', '94', '93', '92', '132', '151',
159 '85', '84', '102', '83', '101', '82', '100',
161 '138', '137', '248', '136', '247', '135', '246',
162 '245', '244', '134', '243', '133', '242', '160',
164 '141', '172', '140', '171', '139',
166 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
167 '95', '94', '93', '92', '132', '151',
168 '85', '102', '84', '101', '83', '100', '82',
170 '138', '248', '137', '247', '136', '246', '245',
171 '244', '135', '243', '134', '242', '133', '160',
173 '172', '141', '171', '140', '139',
175 _video_extensions = {
196 # videos that use m3u8
228 _video_dimensions = {
309 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
310 u"file": u"BaW_jenozKc.mp4",
312 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
313 u"uploader": u"Philipp Hagemeister",
314 u"uploader_id": u"phihag",
315 u"upload_date": u"20121002",
316 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
320 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
321 u"file": u"1ltcDfZMA3U.flv",
322 u"note": u"Test VEVO video (#897)",
324 u"upload_date": u"20070518",
325 u"title": u"Maps - It Will Find You",
326 u"description": u"Music video by Maps performing It Will Find You.",
327 u"uploader": u"MuteUSA",
328 u"uploader_id": u"MuteUSA"
332 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
333 u"file": u"UxxajLWwzqY.mp4",
334 u"note": u"Test generic use_cipher_signature video (#897)",
336 u"upload_date": u"20120506",
337 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
338 u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
339 u"uploader": u"Icona Pop",
340 u"uploader_id": u"IconaPop"
344 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
345 u"file": u"07FYdnEawAQ.mp4",
346 u"note": u"Test VEVO video with age protection (#956)",
348 u"upload_date": u"20130703",
349 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
350 u"description": u"md5:64249768eec3bc4276236606ea996373",
351 u"uploader": u"justintimberlakeVEVO",
352 u"uploader_id": u"justintimberlakeVEVO"
356 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
357 u'file': u'TGi3HqYrWHE.mp4',
358 u'note': u'm3u8 video',
360 u'title': u'Triathlon - Men - London 2012 Olympic Games',
361 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
362 u'uploader': u'olympic',
363 u'upload_date': u'20120807',
364 u'uploader_id': u'olympic',
367 u'skip_download': True,
374 def suitable(cls, url):
375 """Receives a URL and returns True if suitable for this IE."""
376 if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
377 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
379 def report_video_webpage_download(self, video_id):
380 """Report attempt to download video webpage."""
381 self.to_screen(u'%s: Downloading video webpage' % video_id)
383 def report_video_info_webpage_download(self, video_id):
384 """Report attempt to download video info webpage."""
385 self.to_screen(u'%s: Downloading video info webpage' % video_id)
387 def report_video_subtitles_download(self, video_id):
388 """Report attempt to download video info webpage."""
389 self.to_screen(u'%s: Checking available subtitles' % video_id)
391 def report_video_subtitles_request(self, video_id, sub_lang, format):
392 """Report attempt to download video info webpage."""
393 self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
395 def report_video_subtitles_available(self, video_id, sub_lang_list):
396 """Report available subtitles."""
397 sub_lang = ",".join(list(sub_lang_list.keys()))
398 self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
400 def report_information_extraction(self, video_id):
401 """Report attempt to extract video information."""
402 self.to_screen(u'%s: Extracting video information' % video_id)
404 def report_unavailable_format(self, video_id, format):
405 """Report extracted video URL."""
406 self.to_screen(u'%s: Format %s not available' % (video_id, format))
408 def report_rtmp_download(self):
409 """Indicate the download will use the RTMP protocol."""
410 self.to_screen(u'RTMP download detected')
412 def _decrypt_signature(self, s):
413 """Turn the encrypted s field into a working signature"""
416 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
418 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
420 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
422 return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
424 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
426 return s[5:20] + s[2] + s[21:]
428 return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
430 return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
432 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
434 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
436 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
438 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
440 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
443 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
445 def _decrypt_signature_age_gate(self, s):
446 # The videos with age protection use another player, so the algorithms
449 return s[2:63] + s[82] + s[64:82] + s[63]
451 # Fallback to the other algortihms
452 return self._decrypt_signature(s)
455 def _get_available_subtitles(self, video_id):
456 self.report_video_subtitles_download(video_id)
457 request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
459 sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
460 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
461 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
463 sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
464 sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
465 if not sub_lang_list:
466 self._downloader.report_warning(u'video doesn\'t have subtitles')
470 def _list_available_subtitles(self, video_id):
471 sub_lang_list = self._get_available_subtitles(video_id)
472 self.report_video_subtitles_available(video_id, sub_lang_list)
474 def _request_subtitle(self, sub_lang, sub_name, video_id, format):
476 Return the subtitle as a string or None if they are not found
478 self.report_video_subtitles_request(video_id, sub_lang, format)
479 params = compat_urllib_parse.urlencode({
485 url = 'http://www.youtube.com/api/timedtext?' + params
487 sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
488 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
489 self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
492 self._downloader.report_warning(u'Did not fetch video subtitles')
496 def _request_automatic_caption(self, video_id, webpage):
497 """We need the webpage for getting the captions url, pass it as an
498 argument to speed up the process."""
499 sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
500 sub_format = self._downloader.params.get('subtitlesformat')
501 self.to_screen(u'%s: Looking for automatic captions' % video_id)
502 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
503 err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
505 self._downloader.report_warning(err_msg)
507 player_config = json.loads(mobj.group(1))
509 args = player_config[u'args']
510 caption_url = args[u'ttsurl']
511 timestamp = args[u'timestamp']
512 params = compat_urllib_parse.urlencode({
519 subtitles_url = caption_url + '&' + params
520 sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
521 return {sub_lang: sub}
522 # An extractor error can be raise by the download process if there are
523 # no automatic captions but there are subtitles
524 except (KeyError, ExtractorError):
525 self._downloader.report_warning(err_msg)
528 def _extract_subtitles(self, video_id):
530 Return a dictionary: {language: subtitles} or {} if the subtitles
533 available_subs_list = self._get_available_subtitles(video_id)
534 sub_format = self._downloader.params.get('subtitlesformat')
535 if not available_subs_list: #There was some error, it didn't get the available subtitles
537 if self._downloader.params.get('allsubtitles', False):
538 sub_lang_list = available_subs_list
540 if self._downloader.params.get('subtitleslangs', False):
541 reqested_langs = self._downloader.params.get('subtitleslangs')
542 elif 'en' in available_subs_list:
543 reqested_langs = ['en']
545 reqested_langs = [list(available_subs_list.keys())[0]]
548 for sub_lang in reqested_langs:
549 if not sub_lang in available_subs_list:
550 self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
552 sub_lang_list[sub_lang] = available_subs_list[sub_lang]
554 for sub_lang in sub_lang_list:
555 subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
557 subtitles[sub_lang] = subtitle
560 def _print_formats(self, formats):
561 print('Available formats:')
563 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
564 self._video_dimensions.get(x, '???'),
565 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
567 def _extract_id(self, url):
568 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
570 raise ExtractorError(u'Invalid URL: %s' % url)
571 video_id = mobj.group(2)
574 def _get_video_url_list(self, url_map):
576 Transform a dictionary in the format {itag:url} to a list of (itag, url)
577 with the requested formats.
579 req_format = self._downloader.params.get('format', None)
580 format_limit = self._downloader.params.get('format_limit', None)
581 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
582 if format_limit is not None and format_limit in available_formats:
583 format_list = available_formats[available_formats.index(format_limit):]
585 format_list = available_formats
586 existing_formats = [x for x in format_list if x in url_map]
587 if len(existing_formats) == 0:
588 raise ExtractorError(u'no known formats available for video')
589 if self._downloader.params.get('listformats', None):
590 self._print_formats(existing_formats)
592 if req_format is None or req_format == 'best':
593 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
594 elif req_format == 'worst':
595 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
596 elif req_format in ('-1', 'all'):
597 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
599 # Specific formats. We pick the first in a slash-delimeted sequence.
600 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
601 req_formats = req_format.split('/')
602 video_url_list = None
603 for rf in req_formats:
605 video_url_list = [(rf, url_map[rf])]
607 if video_url_list is None:
608 raise ExtractorError(u'requested format not available')
609 return video_url_list
611 def _extract_from_m3u8(self, manifest_url, video_id):
613 def _get_urls(_manifest):
614 lines = _manifest.split('\n')
615 urls = filter(lambda l: l and not l.startswith('#'),
618 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
619 formats_urls = _get_urls(manifest)
620 for format_url in formats_urls:
621 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
622 url_map[itag] = format_url
625 def _real_extract(self, url):
626 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
627 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
629 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
630 mobj = re.search(self._NEXT_URL_RE, url)
632 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
633 video_id = self._extract_id(url)
636 self.report_video_webpage_download(video_id)
637 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
638 request = compat_urllib_request.Request(url)
640 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
641 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
642 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
644 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
646 # Attempt to extract SWF player URL
647 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
649 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
654 self.report_video_info_webpage_download(video_id)
655 if re.search(r'player-age-gate-content">', video_webpage) is not None:
656 self.report_age_confirmation()
658 # We simulate the access to the video from www.youtube.com/v/{video_id}
659 # this can be viewed without login into Youtube
660 data = compat_urllib_parse.urlencode({'video_id': video_id,
664 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
668 video_info_url = 'https://www.youtube.com/get_video_info?' + data
669 video_info_webpage = self._download_webpage(video_info_url, video_id,
671 errnote='unable to download video info webpage')
672 video_info = compat_parse_qs(video_info_webpage)
675 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
676 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
677 % (video_id, el_type))
678 video_info_webpage = self._download_webpage(video_info_url, video_id,
680 errnote='unable to download video info webpage')
681 video_info = compat_parse_qs(video_info_webpage)
682 if 'token' in video_info:
684 if 'token' not in video_info:
685 if 'reason' in video_info:
686 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
688 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
690 # Check for "rental" videos
691 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
692 raise ExtractorError(u'"rental" videos not supported')
694 # Start extracting information
695 self.report_information_extraction(video_id)
698 if 'author' not in video_info:
699 raise ExtractorError(u'Unable to extract uploader name')
700 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
703 video_uploader_id = None
704 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
706 video_uploader_id = mobj.group(1)
708 self._downloader.report_warning(u'unable to extract uploader nickname')
711 if 'title' not in video_info:
712 raise ExtractorError(u'Unable to extract video title')
713 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
716 # We try first to get a high quality image:
717 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
718 video_webpage, re.DOTALL)
719 if m_thumb is not None:
720 video_thumbnail = m_thumb.group(1)
721 elif 'thumbnail_url' not in video_info:
722 self._downloader.report_warning(u'unable to extract video thumbnail')
724 else: # don't panic if we can't find it
725 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
729 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
731 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
732 upload_date = unified_strdate(upload_date)
735 video_description = get_element_by_id("eow-description", video_webpage)
736 if video_description:
737 video_description = clean_html(video_description)
739 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
741 video_description = unescapeHTML(fd_mobj.group(1))
743 video_description = u''
746 video_subtitles = None
748 if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
749 video_subtitles = self._extract_subtitles(video_id)
750 elif self._downloader.params.get('writeautomaticsub', False):
751 video_subtitles = self._request_automatic_caption(video_id, video_webpage)
753 if self._downloader.params.get('listsubtitles', False):
754 self._list_available_subtitles(video_id)
757 if 'length_seconds' not in video_info:
758 self._downloader.report_warning(u'unable to extract video duration')
761 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
763 # Decide which formats to download
766 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
768 raise ValueError('Could not find vevo ID')
769 info = json.loads(mobj.group(1))
771 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
772 # this signatures are encrypted
773 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
775 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
776 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
777 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
779 if 'url_encoded_fmt_stream_map' in video_info:
780 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
782 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
783 elif 'adaptive_fmts' in video_info:
784 if 'url_encoded_fmt_stream_map' in video_info:
785 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
787 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
791 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
792 self.report_rtmp_download()
793 video_url_list = [(None, video_info['conn'][0])]
794 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
795 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
796 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
798 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
799 url_data = compat_parse_qs(url_data_str)
800 if 'itag' in url_data and 'url' in url_data:
801 url = url_data['url'][0]
802 if 'sig' in url_data:
803 url += '&signature=' + url_data['sig'][0]
804 elif 's' in url_data:
805 if self._downloader.params.get('verbose'):
808 player_version = self._search_regex(r'ad3-(.+?)\.swf',
809 video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
810 'flash player', fatal=False)
811 player = 'flash player %s' % player_version
813 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
814 'html5 player', fatal=False)
815 parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
816 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
817 (len(s), parts_sizes, url_data['itag'][0], player))
818 encrypted_sig = url_data['s'][0]
820 signature = self._decrypt_signature_age_gate(encrypted_sig)
822 signature = self._decrypt_signature(encrypted_sig)
823 url += '&signature=' + signature
824 if 'ratebypass' not in url:
825 url += '&ratebypass=yes'
826 url_map[url_data['itag'][0]] = url
827 video_url_list = self._get_video_url_list(url_map)
828 if not video_url_list:
830 elif video_info.get('hlsvp'):
831 manifest_url = video_info['hlsvp'][0]
832 url_map = self._extract_from_m3u8(manifest_url, video_id)
833 video_url_list = self._get_video_url_list(url_map)
834 if not video_url_list:
838 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
841 for format_param, video_real_url in video_url_list:
843 video_extension = self._video_extensions.get(format_param, 'flv')
845 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
846 self._video_dimensions.get(format_param, '???'),
847 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
851 'url': video_real_url,
852 'uploader': video_uploader,
853 'uploader_id': video_uploader_id,
854 'upload_date': upload_date,
855 'title': video_title,
856 'ext': video_extension,
857 'format': video_format,
858 'thumbnail': video_thumbnail,
859 'description': video_description,
860 'player_url': player_url,
861 'subtitles': video_subtitles,
862 'duration': video_duration
866 class YoutubePlaylistIE(InfoExtractor):
867 IE_DESC = u'YouTube.com playlists'
873 (?:course|view_play_list|my_playlists|artist|playlist|watch)
874 \? (?:.*?&)*? (?:p|a|list)=
877 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
880 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
882 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
884 IE_NAME = u'youtube:playlist'
887 def suitable(cls, url):
888 """Receives a URL and returns True if suitable for this IE."""
889 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
891 def _real_extract(self, url):
892 # Extract playlist id
893 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
895 raise ExtractorError(u'Invalid URL: %s' % url)
897 # Download playlist videos from API
898 playlist_id = mobj.group(1) or mobj.group(2)
901 for page_num in itertools.count(1):
902 start_index = self._MAX_RESULTS * (page_num - 1) + 1
903 if start_index >= 1000:
904 self._downloader.report_warning(u'Max number of results reached')
906 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
907 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
910 response = json.loads(page)
911 except ValueError as err:
912 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
914 if 'feed' not in response:
915 raise ExtractorError(u'Got a malformed response from YouTube API')
916 playlist_title = response['feed']['title']['$t']
917 if 'entry' not in response['feed']:
918 # Number of videos is a multiple of self._MAX_RESULTS
921 for entry in response['feed']['entry']:
922 index = entry['yt$position']['$t']
923 if 'media$group' in entry and 'media$player' in entry['media$group']:
924 videos.append((index, entry['media$group']['media$player']['url']))
926 videos = [v[1] for v in sorted(videos)]
928 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
929 return [self.playlist_result(url_results, playlist_id, playlist_title)]
932 class YoutubeChannelIE(InfoExtractor):
933 IE_DESC = u'YouTube.com channels'
934 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
935 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
936 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
937 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
938 IE_NAME = u'youtube:channel'
940 def extract_videos_from_page(self, page):
942 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
943 if mobj.group(1) not in ids_in_page:
944 ids_in_page.append(mobj.group(1))
947 def _real_extract(self, url):
949 mobj = re.match(self._VALID_URL, url)
951 raise ExtractorError(u'Invalid URL: %s' % url)
953 # Download channel page
954 channel_id = mobj.group(1)
958 url = self._TEMPLATE_URL % (channel_id, pagenum)
959 page = self._download_webpage(url, channel_id,
960 u'Downloading page #%s' % pagenum)
962 # Extract video identifiers
963 ids_in_page = self.extract_videos_from_page(page)
964 video_ids.extend(ids_in_page)
966 # Download any subsequent channel pages using the json-based channel_ajax query
967 if self._MORE_PAGES_INDICATOR in page:
968 for pagenum in itertools.count(1):
969 url = self._MORE_PAGES_URL % (pagenum, channel_id)
970 page = self._download_webpage(url, channel_id,
971 u'Downloading page #%s' % pagenum)
973 page = json.loads(page)
975 ids_in_page = self.extract_videos_from_page(page['content_html'])
976 video_ids.extend(ids_in_page)
978 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
981 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
983 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
984 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
985 return [self.playlist_result(url_entries, channel_id)]
988 class YoutubeUserIE(InfoExtractor):
989 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
990 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
991 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
992 _GDATA_PAGE_SIZE = 50
993 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
994 _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
995 IE_NAME = u'youtube:user'
997 def _real_extract(self, url):
999 mobj = re.match(self._VALID_URL, url)
1001 raise ExtractorError(u'Invalid URL: %s' % url)
1003 username = mobj.group(1)
1005 # Download video ids using YouTube Data API. Result size per
1006 # query is limited (currently to 50 videos) so we need to query
1007 # page by page until there are no video ids - it means we got
1012 for pagenum in itertools.count(0):
1013 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1015 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1016 page = self._download_webpage(gdata_url, username,
1017 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1019 # Extract video identifiers
1022 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1023 if mobj.group(1) not in ids_in_page:
1024 ids_in_page.append(mobj.group(1))
1026 video_ids.extend(ids_in_page)
1028 # A little optimization - if current page is not
1029 # "full", ie. does not contain PAGE_SIZE video ids then
1030 # we can assume that this page is the last one - there
1031 # are no more ids on further pages - no need to query
1034 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1037 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1038 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1039 return [self.playlist_result(url_results, playlist_title = username)]
1041 class YoutubeSearchIE(SearchInfoExtractor):
1042 IE_DESC = u'YouTube.com searches'
1043 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1045 IE_NAME = u'youtube:search'
1046 _SEARCH_KEY = 'ytsearch'
1048 def report_download_page(self, query, pagenum):
1049 """Report attempt to download search page with given number."""
1050 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1052 def _get_n_results(self, query, n):
1053 """Get a specified number of results for a query"""
1059 while (50 * pagenum) < limit:
1060 self.report_download_page(query, pagenum+1)
1061 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1062 request = compat_urllib_request.Request(result_url)
1064 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1065 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1066 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1067 api_response = json.loads(data)['data']
1069 if not 'items' in api_response:
1070 raise ExtractorError(u'[youtube] No video results')
1072 new_ids = list(video['id'] for video in api_response['items'])
1073 video_ids += new_ids
1075 limit = min(n, api_response['totalItems'])
1078 if len(video_ids) > n:
1079 video_ids = video_ids[:n]
1080 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1081 return self.playlist_result(videos, query)
1084 class YoutubeShowIE(InfoExtractor):
1085 IE_DESC = u'YouTube.com (multi-season) shows'
1086 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1087 IE_NAME = u'youtube:show'
1089 def _real_extract(self, url):
1090 mobj = re.match(self._VALID_URL, url)
1091 show_name = mobj.group(1)
1092 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1093 # There's one playlist for each season of the show
1094 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1095 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1096 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1099 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1101 Base class for extractors that fetch info from
1102 http://www.youtube.com/feed_ajax
1103 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1105 _LOGIN_REQUIRED = True
1107 # use action_load_personal_feed instead of action_load_system_feed
1108 _PERSONAL_FEED = False
1111 def _FEED_TEMPLATE(self):
1112 action = 'action_load_system_feed'
1113 if self._PERSONAL_FEED:
1114 action = 'action_load_personal_feed'
1115 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1119 return u'youtube:%s' % self._FEED_NAME
1121 def _real_initialize(self):
1124 def _real_extract(self, url):
1126 # The step argument is available only in 2.7 or higher
1127 for i in itertools.count(0):
1128 paging = i*self._PAGING_STEP
1129 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1130 u'%s feed' % self._FEED_NAME,
1131 u'Downloading page %s' % i)
1132 info = json.loads(info)
1133 feed_html = info['feed_html']
1134 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1135 ids = orderedSet(m.group(1) for m in m_ids)
1136 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1137 if info['paging'] is None:
1139 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1141 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1142 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1143 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1144 _FEED_NAME = 'subscriptions'
1145 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1147 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1148 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1149 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1150 _FEED_NAME = 'recommended'
1151 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1153 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1154 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1155 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1156 _FEED_NAME = 'watch_later'
1157 _PLAYLIST_TITLE = u'Youtube Watch Later'
1159 _PERSONAL_FEED = True
1161 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1162 IE_NAME = u'youtube:favorites'
1163 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1164 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
1165 _LOGIN_REQUIRED = True
1167 def _real_extract(self, url):
1168 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1169 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1170 return self.url_result(playlist_id, 'YoutubePlaylist')