9 from .common import InfoExtractor, SearchInfoExtractor
10 from .subtitles import SubtitlesIE
16 compat_urllib_request,
27 class YoutubeBaseInfoExtractor(InfoExtractor):
28 """Provide base functions for Youtube extractors"""
29 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
30 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
31 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
32 _NETRC_MACHINE = 'youtube'
33 # If True it will raise an error if no login info is provided
34 _LOGIN_REQUIRED = False
36 def report_lang(self):
37 """Report attempt to set language."""
38 self.to_screen(u'Setting language')
40 def _set_language(self):
41 request = compat_urllib_request.Request(self._LANG_URL)
44 compat_urllib_request.urlopen(request).read()
45 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
46 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
51 (username, password) = self._get_login_info()
52 # No authentication to be performed
54 if self._LOGIN_REQUIRED:
55 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
58 request = compat_urllib_request.Request(self._LOGIN_URL)
60 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
61 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
62 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
67 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
70 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
76 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
80 u'PersistentCookie': u'yes',
82 u'bgresponse': u'js_disabled',
83 u'checkConnection': u'',
84 u'checkedDomains': u'youtube',
90 u'signIn': u'Sign in',
92 u'service': u'youtube',
96 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
98 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
99 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
100 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
103 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
104 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
105 self._downloader.report_warning(u'unable to log in: bad username or password')
107 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
108 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
112 def _confirm_age(self):
115 'action_confirm': 'Confirm',
117 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
119 self.report_age_confirmation()
120 compat_urllib_request.urlopen(request).read().decode('utf-8')
121 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
122 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
125 def _real_initialize(self):
126 if self._downloader is None:
128 if not self._set_language():
130 if not self._login():
135 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
136 IE_DESC = u'YouTube.com'
139 (?:https?://)? # http(s):// (optional)
140 (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
141 tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
142 (?:.*?\#/)? # handle anchor (#/) redirect urls
143 (?: # the various things that can precede the ID:
144 (?:(?:v|embed|e)/) # v/ or embed/ or e/
145 |(?: # or the v= param in all its forms
146 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
147 (?:\?|\#!?) # the params delimiter ? or # or #!
148 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
152 |youtu\.be/ # just youtu.be/xxxx
154 )? # all until now is optional -> you can pass the naked ID
155 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
156 (?(1).+)? # if we found the ID, everything can follow
158 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
159 # Listed in order of quality
160 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
161 # Apple HTTP Live Streaming
162 '96', '95', '94', '93', '92', '132', '151',
164 '85', '84', '102', '83', '101', '82', '100',
166 '138', '137', '248', '136', '247', '135', '246',
167 '245', '244', '134', '243', '133', '242', '160',
169 '141', '172', '140', '171', '139',
171 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
172 # Apple HTTP Live Streaming
173 '96', '95', '94', '93', '92', '132', '151',
175 '85', '102', '84', '101', '83', '100', '82',
177 '138', '248', '137', '247', '136', '246', '245',
178 '244', '135', '243', '134', '242', '133', '160',
180 '172', '141', '171', '140', '139',
182 _video_formats_map = {
183 'flv': ['35', '34', '6', '5'],
184 '3gp': ['36', '17', '13'],
185 'mp4': ['38', '37', '22', '18'],
186 'webm': ['46', '45', '44', '43'],
188 _video_extensions = {
210 # Apple HTTP Live Streaming
242 _video_dimensions = {
324 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
325 u"file": u"BaW_jenozKc.mp4",
327 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
328 u"uploader": u"Philipp Hagemeister",
329 u"uploader_id": u"phihag",
330 u"upload_date": u"20121002",
331 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
335 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
336 u"file": u"1ltcDfZMA3U.flv",
337 u"note": u"Test VEVO video (#897)",
339 u"upload_date": u"20070518",
340 u"title": u"Maps - It Will Find You",
341 u"description": u"Music video by Maps performing It Will Find You.",
342 u"uploader": u"MuteUSA",
343 u"uploader_id": u"MuteUSA"
347 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
348 u"file": u"UxxajLWwzqY.mp4",
349 u"note": u"Test generic use_cipher_signature video (#897)",
351 u"upload_date": u"20120506",
352 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
353 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
354 u"uploader": u"Icona Pop",
355 u"uploader_id": u"IconaPop"
359 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
360 u"file": u"07FYdnEawAQ.mp4",
361 u"note": u"Test VEVO video with age protection (#956)",
363 u"upload_date": u"20130703",
364 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
365 u"description": u"md5:64249768eec3bc4276236606ea996373",
366 u"uploader": u"justintimberlakeVEVO",
367 u"uploader_id": u"justintimberlakeVEVO"
371 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
372 u'file': u'TGi3HqYrWHE.mp4',
373 u'note': u'm3u8 video',
375 u'title': u'Triathlon - Men - London 2012 Olympic Games',
376 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
377 u'uploader': u'olympic',
378 u'upload_date': u'20120807',
379 u'uploader_id': u'olympic',
382 u'skip_download': True,
389 def suitable(cls, url):
390 """Receives a URL and returns True if suitable for this IE."""
391 if YoutubePlaylistIE.suitable(url): return False
392 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
394 def report_video_webpage_download(self, video_id):
395 """Report attempt to download video webpage."""
396 self.to_screen(u'%s: Downloading video webpage' % video_id)
398 def report_video_info_webpage_download(self, video_id):
399 """Report attempt to download video info webpage."""
400 self.to_screen(u'%s: Downloading video info webpage' % video_id)
402 def report_information_extraction(self, video_id):
403 """Report attempt to extract video information."""
404 self.to_screen(u'%s: Extracting video information' % video_id)
406 def report_unavailable_format(self, video_id, format):
407 """Report extracted video URL."""
408 self.to_screen(u'%s: Format %s not available' % (video_id, format))
410 def report_rtmp_download(self):
411 """Indicate the download will use the RTMP protocol."""
412 self.to_screen(u'RTMP download detected')
414 def _decrypt_signature(self, s):
415 """Turn the encrypted s field into a working signature"""
418 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
420 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
422 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
424 return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
426 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
428 return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
430 return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1]
432 return s[81:36:-1] + s[0] + s[35:2:-1]
434 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
436 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
438 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
440 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
442 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
445 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
447 def _decrypt_signature_age_gate(self, s):
448 # The videos with age protection use another player, so the algorithms
451 return s[2:63] + s[82] + s[64:82] + s[63]
453 # Fallback to the other algortihms
454 return self._decrypt_signature(s)
456 def _get_available_subtitles(self, video_id):
457 request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
459 sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
460 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
461 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
463 lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
468 params = compat_urllib_parse.urlencode({
471 'fmt': self._downloader.params.get('subtitlesformat'),
473 url = u'http://www.youtube.com/api/timedtext?' + params
474 sub_lang_list[lang] = url
475 if not sub_lang_list:
476 self._downloader.report_warning(u'video doesn\'t have subtitles')
480 def _request_automatic_caption(self, video_id, webpage):
481 """We need the webpage for getting the captions url, pass it as an
482 argument to speed up the process."""
483 sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
484 sub_format = self._downloader.params.get('subtitlesformat')
485 self.to_screen(u'%s: Looking for automatic captions' % video_id)
486 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
487 err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
489 self._downloader.report_warning(err_msg)
491 player_config = json.loads(mobj.group(1))
493 args = player_config[u'args']
494 caption_url = args[u'ttsurl']
495 timestamp = args[u'timestamp']
496 params = compat_urllib_parse.urlencode({
503 subtitles_url = caption_url + '&' + params
504 sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
505 return {sub_lang: sub}
506 # An extractor error can be raise by the download process if there are
507 # no automatic captions but there are subtitles
508 except (KeyError, ExtractorError):
509 self._downloader.report_warning(err_msg)
512 def _print_formats(self, formats):
513 print('Available formats:')
515 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
516 self._video_dimensions.get(x, '???'),
517 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
519 def _extract_id(self, url):
520 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
522 raise ExtractorError(u'Invalid URL: %s' % url)
523 video_id = mobj.group(2)
526 def _get_video_url_list(self, url_map):
528 Transform a dictionary in the format {itag:url} to a list of (itag, url)
529 with the requested formats.
531 req_format = self._downloader.params.get('format', None)
532 format_limit = self._downloader.params.get('format_limit', None)
533 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
534 if format_limit is not None and format_limit in available_formats:
535 format_list = available_formats[available_formats.index(format_limit):]
537 format_list = available_formats
538 existing_formats = [x for x in format_list if x in url_map]
539 if len(existing_formats) == 0:
540 raise ExtractorError(u'no known formats available for video')
541 if self._downloader.params.get('listformats', None):
542 self._print_formats(existing_formats)
544 if req_format is None or req_format == 'best':
545 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
546 elif req_format == 'worst':
547 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
548 elif req_format in ('-1', 'all'):
549 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
551 # Specific formats. We pick the first in a slash-delimeted sequence.
552 # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
553 # available in the specified format. For example,
554 # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
555 # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
556 # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
557 req_formats = req_format.split('/')
558 video_url_list = None
559 for rf in req_formats:
561 video_url_list = [(rf, url_map[rf])]
563 if rf in self._video_formats_map:
564 for srf in self._video_formats_map[rf]:
566 video_url_list = [(srf, url_map[srf])]
571 if video_url_list is None:
572 raise ExtractorError(u'requested format not available')
573 return video_url_list
575 def _extract_from_m3u8(self, manifest_url, video_id):
577 def _get_urls(_manifest):
578 lines = _manifest.split('\n')
579 urls = filter(lambda l: l and not l.startswith('#'),
582 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
583 formats_urls = _get_urls(manifest)
584 for format_url in formats_urls:
585 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
586 url_map[itag] = format_url
589 def _real_extract(self, url):
590 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
591 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
593 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
594 mobj = re.search(self._NEXT_URL_RE, url)
596 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
597 video_id = self._extract_id(url)
600 self.report_video_webpage_download(video_id)
601 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
602 request = compat_urllib_request.Request(url)
604 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
605 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
606 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
608 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
610 # Attempt to extract SWF player URL
611 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
613 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
618 self.report_video_info_webpage_download(video_id)
619 if re.search(r'player-age-gate-content">', video_webpage) is not None:
620 self.report_age_confirmation()
622 # We simulate the access to the video from www.youtube.com/v/{video_id}
623 # this can be viewed without login into Youtube
624 data = compat_urllib_parse.urlencode({'video_id': video_id,
628 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
632 video_info_url = 'https://www.youtube.com/get_video_info?' + data
633 video_info_webpage = self._download_webpage(video_info_url, video_id,
635 errnote='unable to download video info webpage')
636 video_info = compat_parse_qs(video_info_webpage)
639 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
640 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
641 % (video_id, el_type))
642 video_info_webpage = self._download_webpage(video_info_url, video_id,
644 errnote='unable to download video info webpage')
645 video_info = compat_parse_qs(video_info_webpage)
646 if 'token' in video_info:
648 if 'token' not in video_info:
649 if 'reason' in video_info:
650 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
652 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
654 # Check for "rental" videos
655 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
656 raise ExtractorError(u'"rental" videos not supported')
658 # Start extracting information
659 self.report_information_extraction(video_id)
662 if 'author' not in video_info:
663 raise ExtractorError(u'Unable to extract uploader name')
664 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
667 video_uploader_id = None
668 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
670 video_uploader_id = mobj.group(1)
672 self._downloader.report_warning(u'unable to extract uploader nickname')
675 if 'title' not in video_info:
676 raise ExtractorError(u'Unable to extract video title')
677 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
680 # We try first to get a high quality image:
681 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
682 video_webpage, re.DOTALL)
683 if m_thumb is not None:
684 video_thumbnail = m_thumb.group(1)
685 elif 'thumbnail_url' not in video_info:
686 self._downloader.report_warning(u'unable to extract video thumbnail')
688 else: # don't panic if we can't find it
689 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
693 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
695 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
696 upload_date = unified_strdate(upload_date)
699 video_description = get_element_by_id("eow-description", video_webpage)
700 if video_description:
701 video_description = clean_html(video_description)
703 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
705 video_description = unescapeHTML(fd_mobj.group(1))
707 video_description = u''
710 video_subtitles = None
712 if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
713 video_subtitles = self._extract_subtitles(video_id)
714 elif self._downloader.params.get('writeautomaticsub', False):
715 video_subtitles = self._request_automatic_caption(video_id, video_webpage)
717 if self._downloader.params.get('listsubtitles', False):
718 self._list_available_subtitles(video_id)
721 if 'length_seconds' not in video_info:
722 self._downloader.report_warning(u'unable to extract video duration')
725 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
727 # Decide which formats to download
730 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
732 raise ValueError('Could not find vevo ID')
733 info = json.loads(mobj.group(1))
735 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
736 # this signatures are encrypted
737 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
739 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
740 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
741 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
743 if 'url_encoded_fmt_stream_map' in video_info:
744 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
746 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
747 elif 'adaptive_fmts' in video_info:
748 if 'url_encoded_fmt_stream_map' in video_info:
749 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
751 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
755 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
756 self.report_rtmp_download()
757 video_url_list = [(None, video_info['conn'][0])]
758 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
759 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
760 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
762 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
763 url_data = compat_parse_qs(url_data_str)
764 if 'itag' in url_data and 'url' in url_data:
765 url = url_data['url'][0]
766 if 'sig' in url_data:
767 url += '&signature=' + url_data['sig'][0]
768 elif 's' in url_data:
769 if self._downloader.params.get('verbose'):
772 player_version = self._search_regex(r'ad3-(.+?)\.swf',
773 video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
774 'flash player', fatal=False)
775 player = 'flash player %s' % player_version
777 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
778 'html5 player', fatal=False)
779 parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
780 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
781 (len(s), parts_sizes, url_data['itag'][0], player))
782 encrypted_sig = url_data['s'][0]
784 signature = self._decrypt_signature_age_gate(encrypted_sig)
786 signature = self._decrypt_signature(encrypted_sig)
787 url += '&signature=' + signature
788 if 'ratebypass' not in url:
789 url += '&ratebypass=yes'
790 url_map[url_data['itag'][0]] = url
791 video_url_list = self._get_video_url_list(url_map)
792 if not video_url_list:
794 elif video_info.get('hlsvp'):
795 manifest_url = video_info['hlsvp'][0]
796 url_map = self._extract_from_m3u8(manifest_url, video_id)
797 video_url_list = self._get_video_url_list(url_map)
798 if not video_url_list:
802 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
805 for format_param, video_real_url in video_url_list:
807 video_extension = self._video_extensions.get(format_param, 'flv')
809 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
810 self._video_dimensions.get(format_param, '???'),
811 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
815 'url': video_real_url,
816 'uploader': video_uploader,
817 'uploader_id': video_uploader_id,
818 'upload_date': upload_date,
819 'title': video_title,
820 'ext': video_extension,
821 'format': video_format,
822 'thumbnail': video_thumbnail,
823 'description': video_description,
824 'player_url': player_url,
825 'subtitles': video_subtitles,
826 'duration': video_duration
830 class YoutubePlaylistIE(InfoExtractor):
831 IE_DESC = u'YouTube.com playlists'
837 (?:course|view_play_list|my_playlists|artist|playlist|watch)
838 \? (?:.*?&)*? (?:p|a|list)=
841 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
844 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
846 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
848 IE_NAME = u'youtube:playlist'
851 def suitable(cls, url):
852 """Receives a URL and returns True if suitable for this IE."""
853 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
855 def _real_extract(self, url):
856 # Extract playlist id
857 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
859 raise ExtractorError(u'Invalid URL: %s' % url)
861 # Download playlist videos from API
862 playlist_id = mobj.group(1) or mobj.group(2)
865 for page_num in itertools.count(1):
866 start_index = self._MAX_RESULTS * (page_num - 1) + 1
867 if start_index >= 1000:
868 self._downloader.report_warning(u'Max number of results reached')
870 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
871 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
874 response = json.loads(page)
875 except ValueError as err:
876 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
878 if 'feed' not in response:
879 raise ExtractorError(u'Got a malformed response from YouTube API')
880 playlist_title = response['feed']['title']['$t']
881 if 'entry' not in response['feed']:
882 # Number of videos is a multiple of self._MAX_RESULTS
885 for entry in response['feed']['entry']:
886 index = entry['yt$position']['$t']
887 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
890 'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
893 videos = [v[1] for v in sorted(videos)]
895 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
896 return [self.playlist_result(url_results, playlist_id, playlist_title)]
899 class YoutubeChannelIE(InfoExtractor):
900 IE_DESC = u'YouTube.com channels'
901 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
902 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
903 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
904 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
905 IE_NAME = u'youtube:channel'
907 def extract_videos_from_page(self, page):
909 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
910 if mobj.group(1) not in ids_in_page:
911 ids_in_page.append(mobj.group(1))
914 def _real_extract(self, url):
916 mobj = re.match(self._VALID_URL, url)
918 raise ExtractorError(u'Invalid URL: %s' % url)
920 # Download channel page
921 channel_id = mobj.group(1)
925 url = self._TEMPLATE_URL % (channel_id, pagenum)
926 page = self._download_webpage(url, channel_id,
927 u'Downloading page #%s' % pagenum)
929 # Extract video identifiers
930 ids_in_page = self.extract_videos_from_page(page)
931 video_ids.extend(ids_in_page)
933 # Download any subsequent channel pages using the json-based channel_ajax query
934 if self._MORE_PAGES_INDICATOR in page:
935 for pagenum in itertools.count(1):
936 url = self._MORE_PAGES_URL % (pagenum, channel_id)
937 page = self._download_webpage(url, channel_id,
938 u'Downloading page #%s' % pagenum)
940 page = json.loads(page)
942 ids_in_page = self.extract_videos_from_page(page['content_html'])
943 video_ids.extend(ids_in_page)
945 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
948 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
950 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
951 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
952 return [self.playlist_result(url_entries, channel_id)]
955 class YoutubeUserIE(InfoExtractor):
956 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
957 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
958 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
959 _GDATA_PAGE_SIZE = 50
960 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
961 IE_NAME = u'youtube:user'
964 def suitable(cls, url):
965 # Don't return True if the url can be extracted with other youtube
966 # extractor, the regex would is too permissive and it would match.
967 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
968 if any(ie.suitable(url) for ie in other_ies): return False
969 else: return super(YoutubeUserIE, cls).suitable(url)
971 def _real_extract(self, url):
973 mobj = re.match(self._VALID_URL, url)
975 raise ExtractorError(u'Invalid URL: %s' % url)
977 username = mobj.group(1)
979 # Download video ids using YouTube Data API. Result size per
980 # query is limited (currently to 50 videos) so we need to query
981 # page by page until there are no video ids - it means we got
986 for pagenum in itertools.count(0):
987 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
989 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
990 page = self._download_webpage(gdata_url, username,
991 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
994 response = json.loads(page)
995 except ValueError as err:
996 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
998 # Extract video identifiers
1000 for entry in response['feed']['entry']:
1001 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1002 video_ids.extend(ids_in_page)
1004 # A little optimization - if current page is not
1005 # "full", ie. does not contain PAGE_SIZE video ids then
1006 # we can assume that this page is the last one - there
1007 # are no more ids on further pages - no need to query
1010 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1013 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1014 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1015 return [self.playlist_result(url_results, playlist_title = username)]
1017 class YoutubeSearchIE(SearchInfoExtractor):
1018 IE_DESC = u'YouTube.com searches'
1019 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1021 IE_NAME = u'youtube:search'
1022 _SEARCH_KEY = 'ytsearch'
1024 def report_download_page(self, query, pagenum):
1025 """Report attempt to download search page with given number."""
1026 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1028 def _get_n_results(self, query, n):
1029 """Get a specified number of results for a query"""
1035 while (50 * pagenum) < limit:
1036 self.report_download_page(query, pagenum+1)
1037 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1038 request = compat_urllib_request.Request(result_url)
1040 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1041 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1042 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1043 api_response = json.loads(data)['data']
1045 if not 'items' in api_response:
1046 raise ExtractorError(u'[youtube] No video results')
1048 new_ids = list(video['id'] for video in api_response['items'])
1049 video_ids += new_ids
1051 limit = min(n, api_response['totalItems'])
1054 if len(video_ids) > n:
1055 video_ids = video_ids[:n]
1056 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1057 return self.playlist_result(videos, query)
1060 class YoutubeShowIE(InfoExtractor):
1061 IE_DESC = u'YouTube.com (multi-season) shows'
1062 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1063 IE_NAME = u'youtube:show'
1065 def _real_extract(self, url):
1066 mobj = re.match(self._VALID_URL, url)
1067 show_name = mobj.group(1)
1068 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1069 # There's one playlist for each season of the show
1070 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1071 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1072 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1075 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1077 Base class for extractors that fetch info from
1078 http://www.youtube.com/feed_ajax
1079 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1081 _LOGIN_REQUIRED = True
1083 # use action_load_personal_feed instead of action_load_system_feed
1084 _PERSONAL_FEED = False
1087 def _FEED_TEMPLATE(self):
1088 action = 'action_load_system_feed'
1089 if self._PERSONAL_FEED:
1090 action = 'action_load_personal_feed'
1091 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1095 return u'youtube:%s' % self._FEED_NAME
1097 def _real_initialize(self):
1100 def _real_extract(self, url):
1102 # The step argument is available only in 2.7 or higher
1103 for i in itertools.count(0):
1104 paging = i*self._PAGING_STEP
1105 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1106 u'%s feed' % self._FEED_NAME,
1107 u'Downloading page %s' % i)
1108 info = json.loads(info)
1109 feed_html = info['feed_html']
1110 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1111 ids = orderedSet(m.group(1) for m in m_ids)
1112 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1113 if info['paging'] is None:
1115 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1117 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1118 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1119 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1120 _FEED_NAME = 'subscriptions'
1121 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1123 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1124 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1125 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1126 _FEED_NAME = 'recommended'
1127 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1129 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1130 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1131 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1132 _FEED_NAME = 'watch_later'
1133 _PLAYLIST_TITLE = u'Youtube Watch Later'
1135 _PERSONAL_FEED = True
1137 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1138 IE_NAME = u'youtube:favorites'
1139 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1140 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1141 _LOGIN_REQUIRED = True
1143 def _real_extract(self, url):
1144 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1145 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1146 return self.url_result(playlist_id, 'YoutubePlaylist')