X-Git-Url: https://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=83b5840f76c7c9aa76760457e9b87ce86e83e680;hb=15707c7e024f1f29e7abd8ddaa362196ef2d4af6;hp=43fdbfab1f1e637d9201373fb6a659ec0c3ec723;hpb=7caf9830b0ae80363d3b1817b322452474631cc1;p=youtube-dl.git
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 43fdbfab1..83b5840f7 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -17,16 +17,15 @@ from ..swfinterp import SWFInterpreter
from ..compat import (
compat_chr,
compat_parse_qs,
- compat_urllib_parse,
compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus,
+ compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
compat_urlparse,
compat_str,
)
from ..utils import (
clean_html,
- encode_dict,
error_to_compat_str,
ExtractorError,
float_or_none,
@@ -116,7 +115,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'hl': 'en_US',
}
- login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
+ login_data = compat_urllib_parse_urlencode(login_form_strs).encode('ascii')
req = sanitized_Request(self._LOGIN_URL, login_data)
login_results = self._download_webpage(
@@ -149,7 +148,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'TrustDevice': 'on',
})
- tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
+ tfa_data = compat_urllib_parse_urlencode(tfa_form_strs).encode('ascii')
tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
tfa_results = self._download_webpage(
@@ -309,6 +308,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
# Apple HTTP Live Streaming
+ '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
@@ -383,6 +383,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'youtube-dl test video "\'/\\äâð',
'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
'upload_date': '20121002',
'license': 'Standard YouTube License',
'description': 'test chars: "\'/\\äâð\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
@@ -409,6 +410,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'iconic ep', 'iconic', 'love', 'it'],
'uploader': 'Icona Pop',
'uploader_id': 'IconaPop',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop',
'license': 'Standard YouTube License',
'creator': 'Icona Pop',
}
@@ -425,6 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:64249768eec3bc4276236606ea996373',
'uploader': 'justintimberlakeVEVO',
'uploader_id': 'justintimberlakeVEVO',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
'license': 'Standard YouTube License',
'creator': 'Justin Timberlake',
'age_limit': 18,
@@ -441,6 +444,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
'uploader': 'SET India',
'uploader_id': 'setindia',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia',
'license': 'Standard YouTube License',
'age_limit': 18,
}
@@ -454,6 +458,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'youtube-dl test video "\'/\\äâð',
'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
'upload_date': '20121002',
'license': 'Standard YouTube License',
'description': 'test chars: "\'/\\äâð\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
@@ -474,6 +479,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'm4a',
'upload_date': '20121002',
'uploader_id': '8KVIDEO',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
'description': '',
'uploader': '8KVIDEO',
'license': 'Standard YouTube License',
@@ -531,6 +537,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20100909',
'uploader': 'The Amazing Atheist',
'uploader_id': 'TheAmazingAtheist',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
'license': 'Standard YouTube License',
'title': 'Burning Everyone\'s Koran',
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
@@ -546,6 +553,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
'uploader': 'The Witcher',
'uploader_id': 'WitcherGame',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
'upload_date': '20140605',
'license': 'Standard YouTube License',
'age_limit': 18,
@@ -561,6 +569,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
'uploader': 'LloydVEVO',
'uploader_id': 'LloydVEVO',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
'upload_date': '20110629',
'license': 'Standard YouTube License',
'age_limit': 18,
@@ -574,6 +583,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'upload_date': '20100430',
'uploader_id': 'deadmau5',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5',
'creator': 'deadmau5',
'description': 'md5:12c56784b8032162bb936a5f76d55360',
'uploader': 'deadmau5',
@@ -593,6 +603,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'upload_date': '20150827',
'uploader_id': 'olympic',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
'license': 'Standard YouTube License',
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
'uploader': 'Olympics',
@@ -611,6 +622,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'stretched_ratio': 16 / 9.,
'upload_date': '20110310',
'uploader_id': 'AllenMeow',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
'description': 'made by Wacom from Korea | åå¹&å 油添é by TY\'s Allen | æè¬heylisa00cavey1001åå¸ç±æ
æä¾æ¢åç¿»è¯',
'uploader': 'å«è¾å«',
'license': 'Standard YouTube License',
@@ -644,6 +656,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:116377fd2963b81ec4ce64b542173306',
'upload_date': '20150625',
'uploader_id': 'dorappi2000',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
'uploader': 'dorappi2000',
'license': 'Standard YouTube License',
'formats': 'mincount:33',
@@ -685,6 +698,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
'license': 'Standard YouTube License',
},
}, {
@@ -696,6 +710,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
'license': 'Standard YouTube License',
},
}, {
@@ -707,6 +722,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
'license': 'Standard YouTube License',
},
}, {
@@ -718,6 +734,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
'license': 'Standard YouTube License',
},
}],
@@ -752,6 +769,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
'upload_date': '20151119',
'uploader_id': 'IronSoulElf',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
'uploader': 'IronSoulElf',
'license': 'Standard YouTube License',
'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
@@ -791,6 +809,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:a677553cf0840649b731a3024aeff4cc',
'upload_date': '20150127',
'uploader_id': 'BerkmanCenter',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
'uploader': 'BerkmanCenter',
'license': 'Creative Commons Attribution license (reuse allowed)',
},
@@ -798,6 +817,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'skip_download': True,
},
},
+ {
+ # Channel-like uploader_url
+ 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
+ 'info_dict': {
+ 'id': 'eQcmzGIKrzg',
+ 'ext': 'mp4',
+ 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
+ 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
+ 'upload_date': '20151119',
+ 'uploader': 'Bernie 2016',
+ 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
+ 'license': 'Creative Commons Attribution license (reuse allowed)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
{
'url': 'https://www.youtube.com/watch?feature=player_embedded&v=V36LpHqtcDY',
'only_matching': True,
@@ -969,7 +1006,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
continue
sub_formats = []
for ext in self._SUBTITLE_FORMATS:
- params = compat_urllib_parse.urlencode({
+ params = compat_urllib_parse_urlencode({
'lang': lang,
'v': video_id,
'fmt': ext,
@@ -1018,7 +1055,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if caption_url:
timestamp = args['timestamp']
# We get the available subtitles
- list_params = compat_urllib_parse.urlencode({
+ list_params = compat_urllib_parse_urlencode({
'type': 'list',
'tlangs': 1,
'asrs': 1,
@@ -1037,7 +1074,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
sub_lang = lang_node.attrib['lang_code']
sub_formats = []
for ext in self._SUBTITLE_FORMATS:
- params = compat_urllib_parse.urlencode({
+ params = compat_urllib_parse_urlencode({
'lang': original_lang,
'tlang': sub_lang,
'fmt': ext,
@@ -1056,7 +1093,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
caption_tracks = args['caption_tracks']
caption_translation_languages = args['caption_translation_languages']
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
- parsed_caption_url = compat_urlparse.urlparse(caption_url)
+ parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
caption_qs = compat_parse_qs(parsed_caption_url.query)
sub_lang_list = {}
@@ -1072,7 +1109,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'fmt': [ext],
})
sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
- query=compat_urllib_parse.urlencode(caption_qs, True)))
+ query=compat_urllib_parse_urlencode(caption_qs, True)))
sub_formats.append({
'url': sub_url,
'ext': ext,
@@ -1102,7 +1139,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'cpn': [cpn],
})
playback_url = compat_urlparse.urlunparse(
- parsed_playback_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
+ parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
self._download_webpage(
playback_url, video_id, 'Marking watched',
@@ -1187,7 +1224,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# this can be viewed without login into Youtube
url = proto + '://www.youtube.com/embed/%s' % video_id
embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
- data = compat_urllib_parse.urlencode({
+ data = compat_urllib_parse_urlencode({
'video_id': video_id,
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
'sts': self._search_regex(
@@ -1334,9 +1371,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# uploader_id
video_uploader_id = None
- mobj = re.search(r'', video_webpage)
+ video_uploader_url = None
+ mobj = re.search(
+ r'',
+ video_webpage)
if mobj is not None:
- video_uploader_id = mobj.group(1)
+ video_uploader_id = mobj.group('uploader_id')
+ video_uploader_url = mobj.group('uploader_url')
else:
self._downloader.report_warning('unable to extract uploader nickname')
@@ -1642,6 +1683,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': video_id,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
+ 'uploader_url': video_uploader_url,
'upload_date': upload_date,
'license': video_license,
'creator': video_creator,
@@ -1868,7 +1910,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
@classmethod
def suitable(cls, url):
- return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url)
+ return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
+ else super(YoutubeChannelIE, cls).suitable(url))
def _real_extract(self, url):
channel_id = self._match_id(url)
@@ -1943,6 +1986,51 @@ class YoutubeUserIE(YoutubeChannelIE):
return super(YoutubeUserIE, cls).suitable(url)
+class YoutubeLiveIE(YoutubeBaseInfoExtractor):
+ IE_DESC = 'YouTube.com live streams'
+ _VALID_URL = r'(?Phttps?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P[^/]+))/live'
+ IE_NAME = 'youtube:live'
+
+ _TESTS = [{
+ 'url': 'http://www.youtube.com/user/TheYoungTurks/live',
+ 'info_dict': {
+ 'id': 'a48o2S1cPoo',
+ 'ext': 'mp4',
+ 'title': 'The Young Turks - Live Main Show',
+ 'uploader': 'The Young Turks',
+ 'uploader_id': 'TheYoungTurks',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
+ 'upload_date': '20150715',
+ 'license': 'Standard YouTube License',
+ 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
+ 'categories': ['News & Politics'],
+ 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ channel_id = mobj.group('id')
+ base_url = mobj.group('base_url')
+ webpage = self._download_webpage(url, channel_id, fatal=False)
+ if webpage:
+ page_type = self._og_search_property(
+ 'type', webpage, 'page type', default=None)
+ video_id = self._html_search_meta(
+ 'videoId', webpage, 'video id', default=None)
+ if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id):
+ return self.url_result(video_id, YoutubeIE.ie_key())
+ return self.url_result(base_url)
+
+
class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
IE_DESC = 'YouTube.com user/channel playlists'
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P[^/]+)/playlists'
@@ -1996,7 +2084,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
'spf': 'navigate',
}
url_query.update(self._EXTRA_QUERY_ARGS)
- result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
+ result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
data = self._download_json(
result_url, video_id='query "%s"' % query,
note='Downloading page %s' % pagenum,