10 import xml.etree.ElementTree
19 from .extractor.common import InfoExtractor, SearchInfoExtractor
21 from .extractor.ard import ARDIE
22 from .extractor.arte import ArteTvIE
23 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
24 from .extractor.comedycentral import ComedyCentralIE
25 from .extractor.collegehumor import CollegeHumorIE
26 from .extractor.dailymotion import DailymotionIE
27 from .extractor.depositfiles import DepositFilesIE
28 from .extractor.escapist import EscapistIE
29 from .extractor.facebook import FacebookIE
30 from .extractor.gametrailers import GametrailersIE
31 from .extractor.generic import GenericIE
32 from .extractor.googleplus import GooglePlusIE
33 from .extractor.googlesearch import GoogleSearchIE
34 from .extractor.infoq import InfoQIE
35 from .extractor.metacafe import MetacafeIE
36 from .extractor.mtv import MTVIE
37 from .extractor.myvideo import MyVideoIE
38 from .extractor.nba import NBAIE
39 from .extractor.statigram import StatigramIE
40 from .extractor.photobucket import PhotobucketIE
41 from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
42 from .extractor.stanfordoc import StanfordOpenClassroomIE
43 from .extractor.ted import TEDIE
44 from .extractor.vimeo import VimeoIE
45 from .extractor.xvideos import XVideosIE
46 from .extractor.yahoo import YahooIE, YahooSearchIE
47 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
48 from .extractor.zdf import ZDFIE
52 class MixcloudIE(InfoExtractor):
53 """Information extractor for www.mixcloud.com"""
55 _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
56 _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
59 def report_download_json(self, file_id):
60 """Report JSON download."""
61 self.to_screen(u'Downloading json')
63 def get_urls(self, jsonData, fmt, bitrate='best'):
64 """Get urls from 'audio_formats' section in json"""
67 bitrate_list = jsonData[fmt]
68 if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
69 bitrate = max(bitrate_list) # select highest
71 url_list = jsonData[fmt][bitrate]
72 except TypeError: # we have no bitrate info.
73 url_list = jsonData[fmt]
76 def check_urls(self, url_list):
77 """Returns 1st active url from list"""
80 compat_urllib_request.urlopen(url)
82 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
87 def _print_formats(self, formats):
88 print('Available formats:')
89 for fmt in formats.keys():
90 for b in formats[fmt]:
92 ext = formats[fmt][b][0]
93 print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
94 except TypeError: # we have no bitrate info
96 print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
99 def _real_extract(self, url):
100 mobj = re.match(self._VALID_URL, url)
102 raise ExtractorError(u'Invalid URL: %s' % url)
103 # extract uploader & filename from url
104 uploader = mobj.group(1).decode('utf-8')
105 file_id = uploader + "-" + mobj.group(2).decode('utf-8')
107 # construct API request
108 file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
109 # retrieve .json file with links to files
110 request = compat_urllib_request.Request(file_url)
112 self.report_download_json(file_url)
113 jsonData = compat_urllib_request.urlopen(request).read()
114 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
115 raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err))
118 json_data = json.loads(jsonData)
119 player_url = json_data['player_swf_url']
120 formats = dict(json_data['audio_formats'])
122 req_format = self._downloader.params.get('format', None)
125 if self._downloader.params.get('listformats', None):
126 self._print_formats(formats)
129 if req_format is None or req_format == 'best':
130 for format_param in formats.keys():
131 url_list = self.get_urls(formats, format_param)
133 file_url = self.check_urls(url_list)
134 if file_url is not None:
137 if req_format not in formats:
138 raise ExtractorError(u'Format is not available')
140 url_list = self.get_urls(formats, req_format)
141 file_url = self.check_urls(url_list)
142 format_param = req_format
145 'id': file_id.decode('utf-8'),
146 'url': file_url.decode('utf-8'),
147 'uploader': uploader.decode('utf-8'),
149 'title': json_data['name'],
150 'ext': file_url.split('.')[-1].decode('utf-8'),
151 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
152 'thumbnail': json_data['thumbnail_url'],
153 'description': json_data['description'],
154 'player_url': player_url.decode('utf-8'),
160 class YoukuIE(InfoExtractor):
161 _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
164 nowTime = int(time.time() * 1000)
165 random1 = random.randint(1000,1998)
166 random2 = random.randint(1000,9999)
168 return "%d%d%d" %(nowTime,random1,random2)
170 def _get_file_ID_mix_string(self, seed):
172 source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
174 for i in range(len(source)):
175 seed = (seed * 211 + 30031 ) % 65536
176 index = math.floor(seed / 65536 * len(source) )
177 mixed.append(source[int(index)])
178 source.remove(source[int(index)])
179 #return ''.join(mixed)
182 def _get_file_id(self, fileId, seed):
183 mixed = self._get_file_ID_mix_string(seed)
184 ids = fileId.split('*')
188 realId.append(mixed[int(ch)])
189 return ''.join(realId)
191 def _real_extract(self, url):
192 mobj = re.match(self._VALID_URL, url)
194 raise ExtractorError(u'Invalid URL: %s' % url)
195 video_id = mobj.group('ID')
197 info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
199 jsondata = self._download_webpage(info_url, video_id)
201 self.report_extraction(video_id)
203 config = json.loads(jsondata)
205 video_title = config['data'][0]['title']
206 seed = config['data'][0]['seed']
208 format = self._downloader.params.get('format', None)
209 supported_format = list(config['data'][0]['streamfileids'].keys())
211 if format is None or format == 'best':
212 if 'hd2' in supported_format:
217 elif format == 'worst':
225 fileid = config['data'][0]['streamfileids'][format]
226 keys = [s['k'] for s in config['data'][0]['segs'][format]]
227 except (UnicodeDecodeError, ValueError, KeyError):
228 raise ExtractorError(u'Unable to extract info section')
231 sid = self._gen_sid()
232 fileid = self._get_file_id(fileid, seed)
234 #column 8,9 of fileid represent the segment number
235 #fileid[7:9] should be changed
236 for index, key in enumerate(keys):
238 temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
239 download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
242 'id': '%s_part%02d' % (video_id, index),
246 'title': video_title,
249 files_info.append(info)
254 class XNXXIE(InfoExtractor):
255 """Information extractor for xnxx.com"""
257 _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
259 VIDEO_URL_RE = r'flv_url=(.*?)&'
260 VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
261 VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&'
263 def _real_extract(self, url):
264 mobj = re.match(self._VALID_URL, url)
266 raise ExtractorError(u'Invalid URL: %s' % url)
267 video_id = mobj.group(1)
269 # Get webpage content
270 webpage = self._download_webpage(url, video_id)
272 video_url = self._search_regex(self.VIDEO_URL_RE,
273 webpage, u'video URL')
274 video_url = compat_urllib_parse.unquote(video_url)
276 video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
279 video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
280 webpage, u'thumbnail', fatal=False)
287 'title': video_title,
289 'thumbnail': video_thumbnail,
296 class JustinTVIE(InfoExtractor):
297 """Information extractor for justin.tv and twitch.tv"""
298 # TODO: One broadcast may be split into multiple videos. The key
299 # 'broadcast_id' is the same for all parts, and 'broadcast_part'
300 # starts at 1 and increases. Can we treat all parts as one video?
302 _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
304 (?P<channelid>[^/]+)|
305 (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
306 (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
310 _JUSTIN_PAGE_LIMIT = 100
311 IE_NAME = u'justin.tv'
313 def report_download_page(self, channel, offset):
314 """Report attempt to download a single page of videos."""
315 self.to_screen(u'%s: Downloading video information from %d to %d' %
316 (channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
318 # Return count of items, list of *valid* items
319 def _parse_page(self, url, video_id):
320 webpage = self._download_webpage(url, video_id,
321 u'Downloading video info JSON',
322 u'unable to download video info JSON')
324 response = json.loads(webpage)
325 if type(response) != list:
326 error_text = response.get('error', 'unknown error')
327 raise ExtractorError(u'Justin.tv API: %s' % error_text)
329 for clip in response:
330 video_url = clip['video_file_url']
332 video_extension = os.path.splitext(video_url)[1][1:]
333 video_date = re.sub('-', '', clip['start_time'][:10])
334 video_uploader_id = clip.get('user_id', clip.get('channel_id'))
335 video_id = clip['id']
336 video_title = clip.get('title', video_id)
340 'title': video_title,
341 'uploader': clip.get('channel_name', video_uploader_id),
342 'uploader_id': video_uploader_id,
343 'upload_date': video_date,
344 'ext': video_extension,
346 return (len(response), info)
348 def _real_extract(self, url):
349 mobj = re.match(self._VALID_URL, url)
351 raise ExtractorError(u'invalid URL: %s' % url)
353 api_base = 'http://api.justin.tv'
355 if mobj.group('channelid'):
357 video_id = mobj.group('channelid')
358 api = api_base + '/channel/archives/%s.json' % video_id
359 elif mobj.group('chapterid'):
360 chapter_id = mobj.group('chapterid')
362 webpage = self._download_webpage(url, chapter_id)
363 m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
365 raise ExtractorError(u'Cannot find archive of a chapter')
366 archive_id = m.group(1)
368 api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
369 chapter_info_xml = self._download_webpage(api, chapter_id,
370 note=u'Downloading chapter information',
371 errnote=u'Chapter information download failed')
372 doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
373 for a in doc.findall('.//archive'):
374 if archive_id == a.find('./id').text:
377 raise ExtractorError(u'Could not find chapter in chapter information')
379 video_url = a.find('./video_file_url').text
380 video_ext = video_url.rpartition('.')[2] or u'flv'
382 chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
383 chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
384 note='Downloading chapter metadata',
385 errnote='Download of chapter metadata failed')
386 chapter_info = json.loads(chapter_info_json)
388 bracket_start = int(doc.find('.//bracket_start').text)
389 bracket_end = int(doc.find('.//bracket_end').text)
391 # TODO determine start (and probably fix up file)
392 # youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
393 #video_url += u'?start=' + TODO:start_timestamp
394 # bracket_start is 13290, but we want 51670615
395 self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
396 u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
399 'id': u'c' + chapter_id,
402 'title': chapter_info['title'],
403 'thumbnail': chapter_info['preview'],
404 'description': chapter_info['description'],
405 'uploader': chapter_info['channel']['display_name'],
406 'uploader_id': chapter_info['channel']['name'],
410 video_id = mobj.group('videoid')
411 api = api_base + '/broadcast/by_archive/%s.json' % video_id
413 self.report_extraction(video_id)
417 limit = self._JUSTIN_PAGE_LIMIT
420 self.report_download_page(video_id, offset)
421 page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
422 page_count, page_info = self._parse_page(page_url, video_id)
423 info.extend(page_info)
424 if not paged or page_count != limit:
429 class FunnyOrDieIE(InfoExtractor):
430 _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
432 def _real_extract(self, url):
433 mobj = re.match(self._VALID_URL, url)
435 raise ExtractorError(u'invalid URL: %s' % url)
437 video_id = mobj.group('id')
438 webpage = self._download_webpage(url, video_id)
440 video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
441 webpage, u'video URL', flags=re.DOTALL)
443 title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
444 r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
446 video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
447 webpage, u'description', fatal=False, flags=re.DOTALL)
454 'description': video_description,
458 class SteamIE(InfoExtractor):
459 _VALID_URL = r"""http://store\.steampowered\.com/
461 (?P<urltype>video|app)/ #If the page is only for videos or for a game
463 (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
465 _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
466 _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
469 def suitable(cls, url):
470 """Receives a URL and returns True if suitable for this IE."""
471 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
473 def _real_extract(self, url):
474 m = re.match(self._VALID_URL, url, re.VERBOSE)
475 gameID = m.group('gameID')
477 videourl = self._VIDEO_PAGE_TEMPLATE % gameID
478 webpage = self._download_webpage(videourl, gameID)
480 if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
481 videourl = self._AGECHECK_TEMPLATE % gameID
482 self.report_age_confirmation()
483 webpage = self._download_webpage(videourl, gameID)
485 self.report_extraction(gameID)
486 game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
487 webpage, 'game title')
489 urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
490 mweb = re.finditer(urlRE, webpage)
491 namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
492 titles = re.finditer(namesRE, webpage)
493 thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">'
494 thumbs = re.finditer(thumbsRE, webpage)
496 for vid,vtitle,thumb in zip(mweb,titles,thumbs):
497 video_id = vid.group('videoID')
498 title = vtitle.group('videoName')
499 video_url = vid.group('videoURL')
500 video_thumb = thumb.group('thumbnail')
502 raise ExtractorError(u'Cannot find video url for %s' % video_id)
507 'title': unescapeHTML(title),
508 'thumbnail': video_thumb
511 return [self.playlist_result(videos, gameID, game_title)]
513 class UstreamIE(InfoExtractor):
514 _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
517 def _real_extract(self, url):
518 m = re.match(self._VALID_URL, url)
519 video_id = m.group('videoID')
521 video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
522 webpage = self._download_webpage(url, video_id)
524 self.report_extraction(video_id)
526 video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
529 uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
530 webpage, u'uploader', fatal=False, flags=re.DOTALL)
532 thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
533 webpage, u'thumbnail', fatal=False)
539 'title': video_title,
540 'uploader': uploader,
541 'thumbnail': thumbnail,
545 class WorldStarHipHopIE(InfoExtractor):
546 _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
547 IE_NAME = u'WorldStarHipHop'
549 def _real_extract(self, url):
550 m = re.match(self._VALID_URL, url)
551 video_id = m.group('id')
553 webpage_src = self._download_webpage(url, video_id)
555 video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
556 webpage_src, u'video URL')
558 if 'mp4' in video_url:
563 video_title = self._html_search_regex(r"<title>(.*)</title>",
564 webpage_src, u'title')
566 # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
567 thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',
568 webpage_src, u'thumbnail', fatal=False)
571 _title = r"""candytitles.*>(.*)</span>"""
572 mobj = re.search(_title, webpage_src)
574 video_title = mobj.group(1)
579 'title' : video_title,
580 'thumbnail' : thumbnail,
585 class RBMARadioIE(InfoExtractor):
586 _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
588 def _real_extract(self, url):
589 m = re.match(self._VALID_URL, url)
590 video_id = m.group('videoID')
592 webpage = self._download_webpage(url, video_id)
594 json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
595 webpage, u'json data', flags=re.MULTILINE)
598 data = json.loads(json_data)
599 except ValueError as e:
600 raise ExtractorError(u'Invalid JSON: ' + str(e))
602 video_url = data['akamai_url'] + '&cbr=256'
603 url_parts = compat_urllib_parse_urlparse(video_url)
604 video_ext = url_parts.path.rpartition('.')[2]
609 'title': data['title'],
610 'description': data.get('teaser_text'),
611 'location': data.get('country_of_origin'),
612 'uploader': data.get('host', {}).get('name'),
613 'uploader_id': data.get('host', {}).get('slug'),
614 'thumbnail': data.get('image', {}).get('large_url_2x'),
615 'duration': data.get('duration'),
620 class YouPornIE(InfoExtractor):
621 """Information extractor for youporn.com."""
622 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
624 def _print_formats(self, formats):
625 """Print all available formats"""
626 print(u'Available formats:')
627 print(u'ext\t\tformat')
628 print(u'---------------------------------')
629 for format in formats:
630 print(u'%s\t\t%s' % (format['ext'], format['format']))
632 def _specific(self, req_format, formats):
634 if(x["format"]==req_format):
638 def _real_extract(self, url):
639 mobj = re.match(self._VALID_URL, url)
641 raise ExtractorError(u'Invalid URL: %s' % url)
642 video_id = mobj.group('videoid')
644 req = compat_urllib_request.Request(url)
645 req.add_header('Cookie', 'age_verified=1')
646 webpage = self._download_webpage(req, video_id)
648 # Get JSON parameters
649 json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
651 params = json.loads(json_params)
653 raise ExtractorError(u'Invalid JSON')
655 self.report_extraction(video_id)
657 video_title = params['title']
658 upload_date = unified_strdate(params['release_date_f'])
659 video_description = params['description']
660 video_uploader = params['submitted_by']
661 thumbnail = params['thumbnails'][0]['image']
663 raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
665 # Get all of the formats available
666 DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
667 download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
668 webpage, u'download list').strip()
670 # Get all of the links from the page
671 LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
672 links = re.findall(LINK_RE, download_list_html)
674 raise ExtractorError(u'ERROR: no known formats available for video')
676 self.to_screen(u'Links found: %d' % len(links))
681 # A link looks like this:
682 # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
683 # A path looks like this:
684 # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
685 video_url = unescapeHTML( link )
686 path = compat_urllib_parse_urlparse( video_url ).path
687 extension = os.path.splitext( path )[1][1:]
688 format = path.split('/')[4].split('_')[:2]
691 format = "-".join( format )
692 # title = u'%s-%s-%s' % (video_title, size, bitrate)
697 'uploader': video_uploader,
698 'upload_date': upload_date,
699 'title': video_title,
702 'thumbnail': thumbnail,
703 'description': video_description
706 if self._downloader.params.get('listformats', None):
707 self._print_formats(formats)
710 req_format = self._downloader.params.get('format', None)
711 self.to_screen(u'Format: %s' % req_format)
713 if req_format is None or req_format == 'best':
715 elif req_format == 'worst':
717 elif req_format in ('-1', 'all'):
720 format = self._specific( req_format, formats )
722 raise ExtractorError(u'Requested format not available')
727 class PornotubeIE(InfoExtractor):
728 """Information extractor for pornotube.com."""
729 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
731 def _real_extract(self, url):
732 mobj = re.match(self._VALID_URL, url)
734 raise ExtractorError(u'Invalid URL: %s' % url)
736 video_id = mobj.group('videoid')
737 video_title = mobj.group('title')
739 # Get webpage content
740 webpage = self._download_webpage(url, video_id)
743 VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
744 video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
745 video_url = compat_urllib_parse.unquote(video_url)
747 #Get the uploaded date
748 VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
749 upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
750 if upload_date: upload_date = unified_strdate(upload_date)
752 info = {'id': video_id,
755 'upload_date': upload_date,
756 'title': video_title,
762 class YouJizzIE(InfoExtractor):
763 """Information extractor for youjizz.com."""
764 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
766 def _real_extract(self, url):
767 mobj = re.match(self._VALID_URL, url)
769 raise ExtractorError(u'Invalid URL: %s' % url)
771 video_id = mobj.group('videoid')
773 # Get webpage content
774 webpage = self._download_webpage(url, video_id)
776 # Get the video title
777 video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
778 webpage, u'title').strip()
781 result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
783 raise ExtractorError(u'ERROR: unable to extract embed page')
785 embed_page_url = result.group(0).strip()
786 video_id = result.group('videoid')
788 webpage = self._download_webpage(embed_page_url, video_id)
791 video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
792 webpage, u'video URL')
794 info = {'id': video_id,
796 'title': video_title,
799 'player_url': embed_page_url}
803 class EightTracksIE(InfoExtractor):
805 _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
807 def _real_extract(self, url):
808 mobj = re.match(self._VALID_URL, url)
810 raise ExtractorError(u'Invalid URL: %s' % url)
811 playlist_id = mobj.group('id')
813 webpage = self._download_webpage(url, playlist_id)
815 json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
816 data = json.loads(json_like)
818 session = str(random.randint(0, 1000000000))
820 track_count = data['tracks_count']
821 first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
824 for i in itertools.count():
825 api_json = self._download_webpage(next_url, playlist_id,
826 note=u'Downloading song information %s/%s' % (str(i+1), track_count),
827 errnote=u'Failed to download song information')
828 api_data = json.loads(api_json)
829 track_data = api_data[u'set']['track']
831 'id': track_data['id'],
832 'url': track_data['track_file_stream_url'],
833 'title': track_data['performer'] + u' - ' + track_data['name'],
834 'raw_title': track_data['name'],
835 'uploader_id': data['user']['login'],
839 if api_data['set']['at_last_track']:
841 next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
844 class KeekIE(InfoExtractor):
845 _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
848 def _real_extract(self, url):
849 m = re.match(self._VALID_URL, url)
850 video_id = m.group('videoID')
852 video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
853 thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
854 webpage = self._download_webpage(url, video_id)
856 video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
859 uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
860 webpage, u'uploader', fatal=False)
866 'title': video_title,
867 'thumbnail': thumbnail,
873 class MySpassIE(InfoExtractor):
874 _VALID_URL = r'http://www.myspass.de/.*'
876 def _real_extract(self, url):
877 META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
879 # video id is the last path element of the URL
880 # usually there is a trailing slash, so also try the second but last
881 url_path = compat_urllib_parse_urlparse(url).path
882 url_parent_path, video_id = os.path.split(url_path)
884 _, video_id = os.path.split(url_parent_path)
887 metadata_url = META_DATA_URL_TEMPLATE % video_id
888 metadata_text = self._download_webpage(metadata_url, video_id)
889 metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
891 # extract values from metadata
892 url_flv_el = metadata.find('url_flv')
893 if url_flv_el is None:
894 raise ExtractorError(u'Unable to extract download url')
895 video_url = url_flv_el.text
896 extension = os.path.splitext(video_url)[1][1:]
897 title_el = metadata.find('title')
899 raise ExtractorError(u'Unable to extract title')
900 title = title_el.text
901 format_id_el = metadata.find('format_id')
902 if format_id_el is None:
905 format = format_id_el.text
906 description_el = metadata.find('description')
907 if description_el is not None:
908 description = description_el.text
911 imagePreview_el = metadata.find('imagePreview')
912 if imagePreview_el is not None:
913 thumbnail = imagePreview_el.text
922 'thumbnail': thumbnail,
923 'description': description
927 class SpiegelIE(InfoExtractor):
928 _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
930 def _real_extract(self, url):
931 m = re.match(self._VALID_URL, url)
932 video_id = m.group('videoID')
934 webpage = self._download_webpage(url, video_id)
936 video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
939 xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
940 xml_code = self._download_webpage(xml_url, video_id,
941 note=u'Downloading XML', errnote=u'Failed to download XML')
943 idoc = xml.etree.ElementTree.fromstring(xml_code)
945 filename = last_type.findall('./filename')[0].text
946 duration = float(last_type.findall('./duration')[0].text)
948 video_url = 'http://video2.spiegel.de/flash/' + filename
949 video_ext = filename.rpartition('.')[2]
954 'title': video_title,
955 'duration': duration,
959 class LiveLeakIE(InfoExtractor):
961 _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
962 IE_NAME = u'liveleak'
964 def _real_extract(self, url):
965 mobj = re.match(self._VALID_URL, url)
967 raise ExtractorError(u'Invalid URL: %s' % url)
969 video_id = mobj.group('video_id')
971 webpage = self._download_webpage(url, video_id)
973 video_url = self._search_regex(r'file: "(.*?)",',
974 webpage, u'video URL')
976 video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
977 webpage, u'title').replace('LiveLeak.com -', '').strip()
979 video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
980 webpage, u'description', fatal=False)
982 video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
983 webpage, u'uploader', fatal=False)
989 'title': video_title,
990 'description': video_description,
991 'uploader': video_uploader
998 class TumblrIE(InfoExtractor):
999 _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
1001 def _real_extract(self, url):
1002 m_url = re.match(self._VALID_URL, url)
1003 video_id = m_url.group('id')
1004 blog = m_url.group('blog_name')
1006 url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
1007 webpage = self._download_webpage(url, video_id)
1009 re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
1010 video = re.search(re_video, webpage)
1012 raise ExtractorError(u'Unable to extract video')
1013 video_url = video.group('video_url')
1014 ext = video.group('ext')
1016 video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
1017 webpage, u'thumbnail', fatal=False) # We pick the first poster
1018 if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
1020 # The only place where you can get a title, it's not complete,
1021 # but searching in other places doesn't work for all videos
1022 video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
1023 webpage, u'title', flags=re.DOTALL)
1025 return [{'id': video_id,
1027 'title': video_title,
1028 'thumbnail': video_thumbnail,
1032 class BandcampIE(InfoExtractor):
1033 _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
1035 def _real_extract(self, url):
1036 mobj = re.match(self._VALID_URL, url)
1037 title = mobj.group('title')
1038 webpage = self._download_webpage(url, title)
1039 # We get the link to the free download page
1040 m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
1041 if m_download is None:
1042 raise ExtractorError(u'No free songs found')
1044 download_link = m_download.group(1)
1045 id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
1046 webpage, re.MULTILINE|re.DOTALL).group('id')
1048 download_webpage = self._download_webpage(download_link, id,
1049 'Downloading free downloads page')
1050 # We get the dictionary of the track from some javascrip code
1051 info = re.search(r'items: (.*?),$',
1052 download_webpage, re.MULTILINE).group(1)
1053 info = json.loads(info)[0]
1054 # We pick mp3-320 for now, until format selection can be easily implemented.
1055 mp3_info = info[u'downloads'][u'mp3-320']
1056 # If we try to use this url it says the link has expired
1057 initial_url = mp3_info[u'url']
1058 re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
1059 m_url = re.match(re_url, initial_url)
1060 #We build the url we will use to get the final track url
1061 # This url is build in Bandcamp in the script download_bunde_*.js
1062 request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
1063 final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
1064 # If we could correctly generate the .rand field the url would be
1065 #in the "download_url" key
1066 final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
1068 track_info = {'id':id,
1069 'title' : info[u'title'],
1072 'thumbnail' : info[u'thumb_url'],
1073 'uploader' : info[u'artist']
1078 class RedTubeIE(InfoExtractor):
1079 """Information Extractor for redtube"""
1080 _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
1082 def _real_extract(self,url):
1083 mobj = re.match(self._VALID_URL, url)
1085 raise ExtractorError(u'Invalid URL: %s' % url)
1087 video_id = mobj.group('id')
1088 video_extension = 'mp4'
1089 webpage = self._download_webpage(url, video_id)
1091 self.report_extraction(video_id)
1093 video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
1094 webpage, u'video URL')
1096 video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
1102 'ext': video_extension,
1103 'title': video_title,
1106 class InaIE(InfoExtractor):
1107 """Information Extractor for Ina.fr"""
1108 _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
1110 def _real_extract(self,url):
1111 mobj = re.match(self._VALID_URL, url)
1113 video_id = mobj.group('id')
1114 mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
1115 video_extension = 'mp4'
1116 webpage = self._download_webpage(mrss_url, video_id)
1118 self.report_extraction(video_id)
1120 video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
1121 webpage, u'video URL')
1123 video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
1129 'ext': video_extension,
1130 'title': video_title,
1133 class HowcastIE(InfoExtractor):
1134 """Information Extractor for Howcast.com"""
1135 _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
1137 def _real_extract(self, url):
1138 mobj = re.match(self._VALID_URL, url)
1140 video_id = mobj.group('id')
1141 webpage_url = 'http://www.howcast.com/videos/' + video_id
1142 webpage = self._download_webpage(webpage_url, video_id)
1144 self.report_extraction(video_id)
1146 video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
1147 webpage, u'video URL')
1149 video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
1152 video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
1153 webpage, u'description', fatal=False)
1155 thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
1156 webpage, u'thumbnail', fatal=False)
1162 'title': video_title,
1163 'description': video_description,
1164 'thumbnail': thumbnail,
1167 class VineIE(InfoExtractor):
1168 """Information Extractor for Vine.co"""
1169 _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
1171 def _real_extract(self, url):
1172 mobj = re.match(self._VALID_URL, url)
1174 video_id = mobj.group('id')
1175 webpage_url = 'https://vine.co/v/' + video_id
1176 webpage = self._download_webpage(webpage_url, video_id)
1178 self.report_extraction(video_id)
1180 video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
1181 webpage, u'video URL')
1183 video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
1186 thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
1187 webpage, u'thumbnail', fatal=False)
1189 uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
1190 webpage, u'uploader', fatal=False, flags=re.DOTALL)
1196 'title': video_title,
1197 'thumbnail': thumbnail,
1198 'uploader': uploader,
1201 class FlickrIE(InfoExtractor):
1202 """Information Extractor for Flickr videos"""
1203 _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
1205 def _real_extract(self, url):
1206 mobj = re.match(self._VALID_URL, url)
1208 video_id = mobj.group('id')
1209 video_uploader_id = mobj.group('uploader_id')
1210 webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
1211 webpage = self._download_webpage(webpage_url, video_id)
1213 secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
1215 first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
1216 first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
1218 node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
1219 first_xml, u'node_id')
1221 second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
1222 second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
1224 self.report_extraction(video_id)
1226 mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
1228 raise ExtractorError(u'Unable to extract video url')
1229 video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
1231 video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
1232 webpage, u'video title')
1234 video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
1235 webpage, u'description', fatal=False)
1237 thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
1238 webpage, u'thumbnail', fatal=False)
1244 'title': video_title,
1245 'description': video_description,
1246 'thumbnail': thumbnail,
1247 'uploader_id': video_uploader_id,
1250 class TeamcocoIE(InfoExtractor):
1251 _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
1253 def _real_extract(self, url):
1254 mobj = re.match(self._VALID_URL, url)
1256 raise ExtractorError(u'Invalid URL: %s' % url)
1257 url_title = mobj.group('url_title')
1258 webpage = self._download_webpage(url, url_title)
1260 video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
1261 webpage, u'video id')
1263 self.report_extraction(video_id)
1265 video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
1268 thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
1269 webpage, u'thumbnail', fatal=False)
1271 video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
1272 webpage, u'description', fatal=False)
1274 data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
1275 data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
1277 video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
1284 'title': video_title,
1285 'thumbnail': thumbnail,
1286 'description': video_description,
1289 class XHamsterIE(InfoExtractor):
1290 """Information Extractor for xHamster"""
1291 _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
1293 def _real_extract(self,url):
1294 mobj = re.match(self._VALID_URL, url)
1296 video_id = mobj.group('id')
1297 mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
1298 webpage = self._download_webpage(mrss_url, video_id)
1300 mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
1302 raise ExtractorError(u'Unable to extract media URL')
1303 if len(mobj.group('server')) == 0:
1304 video_url = compat_urllib_parse.unquote(mobj.group('file'))
1306 video_url = mobj.group('server')+'/key='+mobj.group('file')
1307 video_extension = video_url.split('.')[-1]
1309 video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
1312 # Can't see the description anywhere in the UI
1313 # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
1314 # webpage, u'description', fatal=False)
1315 # if video_description: video_description = unescapeHTML(video_description)
1317 mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
1319 video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
1321 video_upload_date = None
1322 self._downloader.report_warning(u'Unable to extract upload date')
1324 video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
1325 webpage, u'uploader id', default=u'anonymous')
1327 video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
1328 webpage, u'thumbnail', fatal=False)
1333 'ext': video_extension,
1334 'title': video_title,
1335 # 'description': video_description,
1336 'upload_date': video_upload_date,
1337 'uploader_id': video_uploader_id,
1338 'thumbnail': video_thumbnail
1341 class HypemIE(InfoExtractor):
1342 """Information Extractor for hypem"""
1343 _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
1345 def _real_extract(self, url):
1346 mobj = re.match(self._VALID_URL, url)
1348 raise ExtractorError(u'Invalid URL: %s' % url)
1349 track_id = mobj.group(1)
1351 data = { 'ax': 1, 'ts': time.time() }
1352 data_encoded = compat_urllib_parse.urlencode(data)
1353 complete_url = url + "?" + data_encoded
1354 request = compat_urllib_request.Request(complete_url)
1355 response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
1356 cookie = urlh.headers.get('Set-Cookie', '')
1358 self.report_extraction(track_id)
1360 html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
1361 response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
1363 track_list = json.loads(html_tracks)
1364 track = track_list[u'tracks'][0]
1366 raise ExtractorError(u'Hypemachine contained invalid JSON.')
1369 track_id = track[u"id"]
1370 artist = track[u"artist"]
1371 title = track[u"song"]
1373 serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
1374 request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
1375 request.add_header('cookie', cookie)
1376 song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
1378 song_data = json.loads(song_data_json)
1380 raise ExtractorError(u'Hypemachine contained invalid JSON.')
1381 final_url = song_data[u"url"]
1391 class Vbox7IE(InfoExtractor):
1392 """Information Extractor for Vbox7"""
1393 _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
1395 def _real_extract(self,url):
1396 mobj = re.match(self._VALID_URL, url)
1398 raise ExtractorError(u'Invalid URL: %s' % url)
1399 video_id = mobj.group(1)
1401 redirect_page, urlh = self._download_webpage_handle(url, video_id)
1402 new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
1403 redirect_url = urlh.geturl() + new_location
1404 webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
1406 title = self._html_search_regex(r'<title>(.*)</title>',
1407 webpage, u'title').split('/')[0].strip()
1410 info_url = "http://vbox7.com/play/magare.do"
1411 data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
1412 info_request = compat_urllib_request.Request(info_url, data)
1413 info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
1414 info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
1415 if info_response is None:
1416 raise ExtractorError(u'Unable to extract the media url')
1417 (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
1424 'thumbnail': thumbnail_url,
1428 def gen_extractors():
1429 """ Return a list of an instance of every supported extractor.
1430 The order does matter; the first extractor matched is the one handling the URL.
1433 YoutubePlaylistIE(),
1458 StanfordOpenClassroomIE(),
1468 WorldStarHipHopIE(),
1498 def get_info_extractor(ie_name):
1499 """Returns the info extractor class with the given ie_name"""
1500 return globals()[ie_name+'IE']