9 from .common import InfoExtractor, SearchInfoExtractor
15 compat_urllib_request,
26 class YoutubeBaseInfoExtractor(InfoExtractor):
27 """Provide base functions for Youtube extractors"""
28 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
29 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
30 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
31 _NETRC_MACHINE = 'youtube'
32 # If True it will raise an error if no login info is provided
33 _LOGIN_REQUIRED = False
35 def report_lang(self):
36 """Report attempt to set language."""
37 self.to_screen(u'Setting language')
39 def _set_language(self):
40 request = compat_urllib_request.Request(self._LANG_URL)
43 compat_urllib_request.urlopen(request).read()
44 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
45 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
50 (username, password) = self._get_login_info()
51 # No authentication to be performed
53 if self._LOGIN_REQUIRED:
54 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
57 request = compat_urllib_request.Request(self._LOGIN_URL)
59 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
60 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
61 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
66 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
69 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
75 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
79 u'PersistentCookie': u'yes',
81 u'bgresponse': u'js_disabled',
82 u'checkConnection': u'',
83 u'checkedDomains': u'youtube',
89 u'signIn': u'Sign in',
91 u'service': u'youtube',
95 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
97 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
98 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
99 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
102 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
103 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
104 self._downloader.report_warning(u'unable to log in: bad username or password')
106 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
107 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
111 def _confirm_age(self):
114 'action_confirm': 'Confirm',
116 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
118 self.report_age_confirmation()
119 compat_urllib_request.urlopen(request).read().decode('utf-8')
120 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
121 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
124 def _real_initialize(self):
125 if self._downloader is None:
127 if not self._set_language():
129 if not self._login():
133 class YoutubeIE(YoutubeBaseInfoExtractor):
134 IE_DESC = u'YouTube.com'
137 (?:https?://)? # http(s):// (optional)
138 (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
139 tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
140 (?:.*?\#/)? # handle anchor (#/) redirect urls
141 (?: # the various things that can precede the ID:
142 (?:(?:v|embed|e)/) # v/ or embed/ or e/
143 |(?: # or the v= param in all its forms
144 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
145 (?:\?|\#!?) # the params delimiter ? or # or #!
146 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
149 )? # optional -> youtube.com/xxxx is OK
150 )? # all until now is optional -> you can pass the naked ID
151 ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
152 (?(1).+)? # if we found the ID, everything can follow
154 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
155 # Listed in order of quality
156 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
157 '95', '94', '93', '92', '132', '151',
159 '85', '84', '102', '83', '101', '82', '100',
161 '138', '137', '248', '136', '247', '135', '246',
162 '245', '244', '134', '243', '133', '242', '160',
164 '141', '172', '140', '171', '139',
166 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
167 '95', '94', '93', '92', '132', '151',
168 '85', '102', '84', '101', '83', '100', '82',
170 '138', '248', '137', '247', '136', '246', '245',
171 '244', '135', '243', '134', '242', '133', '160',
173 '172', '141', '171', '140', '139',
175 _video_extensions = {
196 # videos that use m3u8
228 _video_dimensions = {
309 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
310 u"file": u"BaW_jenozKc.mp4",
312 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
313 u"uploader": u"Philipp Hagemeister",
314 u"uploader_id": u"phihag",
315 u"upload_date": u"20121002",
316 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
320 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
321 u"file": u"1ltcDfZMA3U.flv",
322 u"note": u"Test VEVO video (#897)",
324 u"upload_date": u"20070518",
325 u"title": u"Maps - It Will Find You",
326 u"description": u"Music video by Maps performing It Will Find You.",
327 u"uploader": u"MuteUSA",
328 u"uploader_id": u"MuteUSA"
332 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
333 u"file": u"UxxajLWwzqY.mp4",
334 u"note": u"Test generic use_cipher_signature video (#897)",
336 u"upload_date": u"20120506",
337 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
338 u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
339 u"uploader": u"Icona Pop",
340 u"uploader_id": u"IconaPop"
344 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
345 u"file": u"07FYdnEawAQ.mp4",
346 u"note": u"Test VEVO video with age protection (#956)",
348 u"upload_date": u"20130703",
349 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
350 u"description": u"md5:64249768eec3bc4276236606ea996373",
351 u"uploader": u"justintimberlakeVEVO",
352 u"uploader_id": u"justintimberlakeVEVO"
356 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
357 u'file': u'TGi3HqYrWHE.mp4',
358 u'note': u'm3u8 video',
360 u'title': u'Triathlon - Men - London 2012 Olympic Games',
361 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
362 u'uploader': u'olympic',
363 u'upload_date': u'20120807',
364 u'uploader_id': u'olympic',
367 u'skip_download': True,
374 def suitable(cls, url):
375 """Receives a URL and returns True if suitable for this IE."""
376 if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
377 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
379 def report_video_webpage_download(self, video_id):
380 """Report attempt to download video webpage."""
381 self.to_screen(u'%s: Downloading video webpage' % video_id)
383 def report_video_info_webpage_download(self, video_id):
384 """Report attempt to download video info webpage."""
385 self.to_screen(u'%s: Downloading video info webpage' % video_id)
387 def report_video_subtitles_download(self, video_id):
388 """Report attempt to download video info webpage."""
389 self.to_screen(u'%s: Checking available subtitles' % video_id)
391 def report_video_subtitles_request(self, video_id, sub_lang, format):
392 """Report attempt to download video info webpage."""
393 self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
395 def report_video_subtitles_available(self, video_id, sub_lang_list):
396 """Report available subtitles."""
397 sub_lang = ",".join(list(sub_lang_list.keys()))
398 self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
400 def report_information_extraction(self, video_id):
401 """Report attempt to extract video information."""
402 self.to_screen(u'%s: Extracting video information' % video_id)
404 def report_unavailable_format(self, video_id, format):
405 """Report extracted video URL."""
406 self.to_screen(u'%s: Format %s not available' % (video_id, format))
408 def report_rtmp_download(self):
409 """Indicate the download will use the RTMP protocol."""
410 self.to_screen(u'RTMP download detected')
412 def _decrypt_signature(self, s):
413 """Turn the encrypted s field into a working signature"""
416 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
418 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
420 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
422 return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
424 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
426 return s[5:20] + s[2] + s[21:]
428 return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
430 return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
432 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
434 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
436 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
438 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
441 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
443 def _decrypt_signature_age_gate(self, s):
444 # The videos with age protection use another player, so the algorithms
447 return s[2:63] + s[82] + s[64:82] + s[63]
449 # Fallback to the other algortihms
450 return self._decrypt_signature(s)
453 def _get_available_subtitles(self, video_id):
454 self.report_video_subtitles_download(video_id)
455 request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
457 sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
458 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
459 return (u'unable to download video subtitles: %s' % compat_str(err), None)
460 sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
461 sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
462 if not sub_lang_list:
463 return (u'video doesn\'t have subtitles', None)
466 def _list_available_subtitles(self, video_id):
467 sub_lang_list = self._get_available_subtitles(video_id)
468 self.report_video_subtitles_available(video_id, sub_lang_list)
470 def _request_subtitle(self, sub_lang, sub_name, video_id, format):
473 (error_message, sub_lang, sub)
475 self.report_video_subtitles_request(video_id, sub_lang, format)
476 params = compat_urllib_parse.urlencode({
482 url = 'http://www.youtube.com/api/timedtext?' + params
484 sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
485 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
486 return (u'unable to download video subtitles: %s' % compat_str(err), None, None)
488 return (u'Did not fetch video subtitles', None, None)
489 return (None, sub_lang, sub)
491 def _request_automatic_caption(self, video_id, webpage):
492 """We need the webpage for getting the captions url, pass it as an
493 argument to speed up the process."""
494 sub_lang = self._downloader.params.get('subtitleslang') or 'en'
495 sub_format = self._downloader.params.get('subtitlesformat')
496 self.to_screen(u'%s: Looking for automatic captions' % video_id)
497 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
498 err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
500 return [(err_msg, None, None)]
501 player_config = json.loads(mobj.group(1))
503 args = player_config[u'args']
504 caption_url = args[u'ttsurl']
505 timestamp = args[u'timestamp']
506 params = compat_urllib_parse.urlencode({
513 subtitles_url = caption_url + '&' + params
514 sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
515 return [(None, sub_lang, sub)]
517 return [(err_msg, None, None)]
519 def _extract_subtitle(self, video_id):
521 Return a list with a tuple:
522 [(error_message, sub_lang, sub)]
524 sub_lang_list = self._get_available_subtitles(video_id)
525 sub_format = self._downloader.params.get('subtitlesformat')
526 if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
527 return [(sub_lang_list[0], None, None)]
528 if self._downloader.params.get('subtitleslang', False):
529 sub_lang = self._downloader.params.get('subtitleslang')
530 elif 'en' in sub_lang_list:
533 sub_lang = list(sub_lang_list.keys())[0]
534 if not sub_lang in sub_lang_list:
535 return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)]
537 subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
540 def _extract_all_subtitles(self, video_id):
541 sub_lang_list = self._get_available_subtitles(video_id)
542 sub_format = self._downloader.params.get('subtitlesformat')
543 if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
544 return [(sub_lang_list[0], None, None)]
546 for sub_lang in sub_lang_list:
547 subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
548 subtitles.append(subtitle)
551 def _print_formats(self, formats):
552 print('Available formats:')
554 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
555 self._video_dimensions.get(x, '???'),
556 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
558 def _extract_id(self, url):
559 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
561 raise ExtractorError(u'Invalid URL: %s' % url)
562 video_id = mobj.group(2)
565 def _get_video_url_list(self, url_map):
567 Transform a dictionary in the format {itag:url} to a list of (itag, url)
568 with the requested formats.
570 req_format = self._downloader.params.get('format', None)
571 format_limit = self._downloader.params.get('format_limit', None)
572 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
573 if format_limit is not None and format_limit in available_formats:
574 format_list = available_formats[available_formats.index(format_limit):]
576 format_list = available_formats
577 existing_formats = [x for x in format_list if x in url_map]
578 if len(existing_formats) == 0:
579 raise ExtractorError(u'no known formats available for video')
580 if self._downloader.params.get('listformats', None):
581 self._print_formats(existing_formats)
583 if req_format is None or req_format == 'best':
584 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
585 elif req_format == 'worst':
586 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
587 elif req_format in ('-1', 'all'):
588 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
590 # Specific formats. We pick the first in a slash-delimeted sequence.
591 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
592 req_formats = req_format.split('/')
593 video_url_list = None
594 for rf in req_formats:
596 video_url_list = [(rf, url_map[rf])]
598 if video_url_list is None:
599 raise ExtractorError(u'requested format not available')
600 return video_url_list
602 def _extract_from_m3u8(self, manifest_url, video_id):
604 def _get_urls(_manifest):
605 lines = _manifest.split('\n')
606 urls = filter(lambda l: l and not l.startswith('#'),
609 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
610 formats_urls = _get_urls(manifest)
611 for format_url in formats_urls:
612 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
613 url_map[itag] = format_url
616 def _real_extract(self, url):
617 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
618 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
620 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
621 mobj = re.search(self._NEXT_URL_RE, url)
623 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
624 video_id = self._extract_id(url)
627 self.report_video_webpage_download(video_id)
628 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
629 request = compat_urllib_request.Request(url)
631 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
632 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
633 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
635 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
637 # Attempt to extract SWF player URL
638 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
640 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
645 self.report_video_info_webpage_download(video_id)
646 if re.search(r'player-age-gate-content">', video_webpage) is not None:
647 self.report_age_confirmation()
649 # We simulate the access to the video from www.youtube.com/v/{video_id}
650 # this can be viewed without login into Youtube
651 data = compat_urllib_parse.urlencode({'video_id': video_id,
655 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
659 video_info_url = 'https://www.youtube.com/get_video_info?' + data
660 video_info_webpage = self._download_webpage(video_info_url, video_id,
662 errnote='unable to download video info webpage')
663 video_info = compat_parse_qs(video_info_webpage)
666 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
667 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
668 % (video_id, el_type))
669 video_info_webpage = self._download_webpage(video_info_url, video_id,
671 errnote='unable to download video info webpage')
672 video_info = compat_parse_qs(video_info_webpage)
673 if 'token' in video_info:
675 if 'token' not in video_info:
676 if 'reason' in video_info:
677 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
679 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
681 # Check for "rental" videos
682 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
683 raise ExtractorError(u'"rental" videos not supported')
685 # Start extracting information
686 self.report_information_extraction(video_id)
689 if 'author' not in video_info:
690 raise ExtractorError(u'Unable to extract uploader name')
691 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
694 video_uploader_id = None
695 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
697 video_uploader_id = mobj.group(1)
699 self._downloader.report_warning(u'unable to extract uploader nickname')
702 if 'title' not in video_info:
703 raise ExtractorError(u'Unable to extract video title')
704 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
707 # We try first to get a high quality image:
708 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
709 video_webpage, re.DOTALL)
710 if m_thumb is not None:
711 video_thumbnail = m_thumb.group(1)
712 elif 'thumbnail_url' not in video_info:
713 self._downloader.report_warning(u'unable to extract video thumbnail')
715 else: # don't panic if we can't find it
716 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
720 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
722 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
723 upload_date = unified_strdate(upload_date)
726 video_description = get_element_by_id("eow-description", video_webpage)
727 if video_description:
728 video_description = clean_html(video_description)
730 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
732 video_description = unescapeHTML(fd_mobj.group(1))
734 video_description = u''
737 video_subtitles = None
739 if self._downloader.params.get('writesubtitles', False):
740 video_subtitles = self._extract_subtitle(video_id)
742 (sub_error, sub_lang, sub) = video_subtitles[0]
744 self._downloader.report_warning(sub_error)
746 if self._downloader.params.get('writeautomaticsub', False):
747 video_subtitles = self._request_automatic_caption(video_id, video_webpage)
748 (sub_error, sub_lang, sub) = video_subtitles[0]
750 self._downloader.report_warning(sub_error)
752 if self._downloader.params.get('allsubtitles', False):
753 video_subtitles = self._extract_all_subtitles(video_id)
754 for video_subtitle in video_subtitles:
755 (sub_error, sub_lang, sub) = video_subtitle
757 self._downloader.report_warning(sub_error)
759 if self._downloader.params.get('listsubtitles', False):
760 self._list_available_subtitles(video_id)
763 if 'length_seconds' not in video_info:
764 self._downloader.report_warning(u'unable to extract video duration')
767 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
769 # Decide which formats to download
772 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
774 raise ValueError('Could not find vevo ID')
775 info = json.loads(mobj.group(1))
777 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
778 # this signatures are encrypted
779 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
781 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
782 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
783 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
785 if 'url_encoded_fmt_stream_map' in video_info:
786 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
788 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
789 elif 'adaptive_fmts' in video_info:
790 if 'url_encoded_fmt_stream_map' in video_info:
791 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
793 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
797 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
798 self.report_rtmp_download()
799 video_url_list = [(None, video_info['conn'][0])]
800 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
801 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
802 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
804 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
805 url_data = compat_parse_qs(url_data_str)
806 if 'itag' in url_data and 'url' in url_data:
807 url = url_data['url'][0]
808 if 'sig' in url_data:
809 url += '&signature=' + url_data['sig'][0]
810 elif 's' in url_data:
811 if self._downloader.params.get('verbose'):
814 player_version = self._search_regex(r'ad3-(.+?)\.swf',
815 video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
816 'flash player', fatal=False)
817 player = 'flash player %s' % player_version
819 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
820 'html5 player', fatal=False)
821 parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
822 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
823 (len(s), parts_sizes, url_data['itag'][0], player))
824 encrypted_sig = url_data['s'][0]
826 signature = self._decrypt_signature_age_gate(encrypted_sig)
828 signature = self._decrypt_signature(encrypted_sig)
829 url += '&signature=' + signature
830 if 'ratebypass' not in url:
831 url += '&ratebypass=yes'
832 url_map[url_data['itag'][0]] = url
833 video_url_list = self._get_video_url_list(url_map)
834 if not video_url_list:
836 elif video_info.get('hlsvp'):
837 manifest_url = video_info['hlsvp'][0]
838 url_map = self._extract_from_m3u8(manifest_url, video_id)
839 video_url_list = self._get_video_url_list(url_map)
840 if not video_url_list:
844 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
847 for format_param, video_real_url in video_url_list:
849 video_extension = self._video_extensions.get(format_param, 'flv')
851 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
852 self._video_dimensions.get(format_param, '???'),
853 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
857 'url': video_real_url,
858 'uploader': video_uploader,
859 'uploader_id': video_uploader_id,
860 'upload_date': upload_date,
861 'title': video_title,
862 'ext': video_extension,
863 'format': video_format,
864 'thumbnail': video_thumbnail,
865 'description': video_description,
866 'player_url': player_url,
867 'subtitles': video_subtitles,
868 'duration': video_duration
872 class YoutubePlaylistIE(InfoExtractor):
873 IE_DESC = u'YouTube.com playlists'
879 (?:course|view_play_list|my_playlists|artist|playlist|watch)
880 \? (?:.*?&)*? (?:p|a|list)=
883 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
886 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
888 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
890 IE_NAME = u'youtube:playlist'
893 def suitable(cls, url):
894 """Receives a URL and returns True if suitable for this IE."""
895 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
897 def _real_extract(self, url):
898 # Extract playlist id
899 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
901 raise ExtractorError(u'Invalid URL: %s' % url)
903 # Download playlist videos from API
904 playlist_id = mobj.group(1) or mobj.group(2)
907 for page_num in itertools.count(1):
908 start_index = self._MAX_RESULTS * (page_num - 1) + 1
909 if start_index >= 1000:
910 self._downloader.report_warning(u'Max number of results reached')
912 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
913 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
916 response = json.loads(page)
917 except ValueError as err:
918 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
920 if 'feed' not in response:
921 raise ExtractorError(u'Got a malformed response from YouTube API')
922 playlist_title = response['feed']['title']['$t']
923 if 'entry' not in response['feed']:
924 # Number of videos is a multiple of self._MAX_RESULTS
927 for entry in response['feed']['entry']:
928 index = entry['yt$position']['$t']
929 if 'media$group' in entry and 'media$player' in entry['media$group']:
930 videos.append((index, entry['media$group']['media$player']['url']))
932 videos = [v[1] for v in sorted(videos)]
934 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
935 return [self.playlist_result(url_results, playlist_id, playlist_title)]
938 class YoutubeChannelIE(InfoExtractor):
939 IE_DESC = u'YouTube.com channels'
940 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
941 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
942 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
943 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
944 IE_NAME = u'youtube:channel'
946 def extract_videos_from_page(self, page):
948 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
949 if mobj.group(1) not in ids_in_page:
950 ids_in_page.append(mobj.group(1))
953 def _real_extract(self, url):
955 mobj = re.match(self._VALID_URL, url)
957 raise ExtractorError(u'Invalid URL: %s' % url)
959 # Download channel page
960 channel_id = mobj.group(1)
964 url = self._TEMPLATE_URL % (channel_id, pagenum)
965 page = self._download_webpage(url, channel_id,
966 u'Downloading page #%s' % pagenum)
968 # Extract video identifiers
969 ids_in_page = self.extract_videos_from_page(page)
970 video_ids.extend(ids_in_page)
972 # Download any subsequent channel pages using the json-based channel_ajax query
973 if self._MORE_PAGES_INDICATOR in page:
974 for pagenum in itertools.count(1):
975 url = self._MORE_PAGES_URL % (pagenum, channel_id)
976 page = self._download_webpage(url, channel_id,
977 u'Downloading page #%s' % pagenum)
979 page = json.loads(page)
981 ids_in_page = self.extract_videos_from_page(page['content_html'])
982 video_ids.extend(ids_in_page)
984 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
987 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
989 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
990 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
991 return [self.playlist_result(url_entries, channel_id)]
994 class YoutubeUserIE(InfoExtractor):
995 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
996 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
997 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
998 _GDATA_PAGE_SIZE = 50
999 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
1000 _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
1001 IE_NAME = u'youtube:user'
1003 def _real_extract(self, url):
1005 mobj = re.match(self._VALID_URL, url)
1007 raise ExtractorError(u'Invalid URL: %s' % url)
1009 username = mobj.group(1)
1011 # Download video ids using YouTube Data API. Result size per
1012 # query is limited (currently to 50 videos) so we need to query
1013 # page by page until there are no video ids - it means we got
1018 for pagenum in itertools.count(0):
1019 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1021 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1022 page = self._download_webpage(gdata_url, username,
1023 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1025 # Extract video identifiers
1028 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1029 if mobj.group(1) not in ids_in_page:
1030 ids_in_page.append(mobj.group(1))
1032 video_ids.extend(ids_in_page)
1034 # A little optimization - if current page is not
1035 # "full", ie. does not contain PAGE_SIZE video ids then
1036 # we can assume that this page is the last one - there
1037 # are no more ids on further pages - no need to query
1040 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1043 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1044 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1045 return [self.playlist_result(url_results, playlist_title = username)]
1047 class YoutubeSearchIE(SearchInfoExtractor):
1048 IE_DESC = u'YouTube.com searches'
1049 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1051 IE_NAME = u'youtube:search'
1052 _SEARCH_KEY = 'ytsearch'
1054 def report_download_page(self, query, pagenum):
1055 """Report attempt to download search page with given number."""
1056 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1058 def _get_n_results(self, query, n):
1059 """Get a specified number of results for a query"""
1065 while (50 * pagenum) < limit:
1066 self.report_download_page(query, pagenum+1)
1067 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1068 request = compat_urllib_request.Request(result_url)
1070 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1071 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1072 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1073 api_response = json.loads(data)['data']
1075 if not 'items' in api_response:
1076 raise ExtractorError(u'[youtube] No video results')
1078 new_ids = list(video['id'] for video in api_response['items'])
1079 video_ids += new_ids
1081 limit = min(n, api_response['totalItems'])
1084 if len(video_ids) > n:
1085 video_ids = video_ids[:n]
1086 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1087 return self.playlist_result(videos, query)
1090 class YoutubeShowIE(InfoExtractor):
1091 IE_DESC = u'YouTube.com (multi-season) shows'
1092 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1093 IE_NAME = u'youtube:show'
1095 def _real_extract(self, url):
1096 mobj = re.match(self._VALID_URL, url)
1097 show_name = mobj.group(1)
1098 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1099 # There's one playlist for each season of the show
1100 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1101 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1102 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1105 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1107 Base class for extractors that fetch info from
1108 http://www.youtube.com/feed_ajax
1109 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1111 _LOGIN_REQUIRED = True
1113 # use action_load_personal_feed instead of action_load_system_feed
1114 _PERSONAL_FEED = False
1117 def _FEED_TEMPLATE(self):
1118 action = 'action_load_system_feed'
1119 if self._PERSONAL_FEED:
1120 action = 'action_load_personal_feed'
1121 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1125 return u'youtube:%s' % self._FEED_NAME
1127 def _real_initialize(self):
1130 def _real_extract(self, url):
1132 # The step argument is available only in 2.7 or higher
1133 for i in itertools.count(0):
1134 paging = i*self._PAGING_STEP
1135 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1136 u'%s feed' % self._FEED_NAME,
1137 u'Downloading page %s' % i)
1138 info = json.loads(info)
1139 feed_html = info['feed_html']
1140 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1141 ids = orderedSet(m.group(1) for m in m_ids)
1142 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1143 if info['paging'] is None:
1145 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1147 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1148 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1149 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1150 _FEED_NAME = 'subscriptions'
1151 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1153 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1154 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1155 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1156 _FEED_NAME = 'recommended'
1157 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1159 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1160 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1161 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1162 _FEED_NAME = 'watch_later'
1163 _PLAYLIST_TITLE = u'Youtube Watch Later'
1165 _PERSONAL_FEED = True
1167 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1168 IE_NAME = u'youtube:favorites'
1169 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1170 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
1171 _LOGIN_REQUIRED = True
1173 def _real_extract(self, url):
1174 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1175 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1176 return self.url_result(playlist_id, 'YoutubePlaylist')