10 import xml.etree.ElementTree
19 from .extractor.common import InfoExtractor, SearchInfoExtractor
21 from .extractor.ard import ARDIE
22 from .extractor.arte import ArteTvIE
23 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
24 from .extractor.comedycentral import ComedyCentralIE
25 from .extractor.collegehumor import CollegeHumorIE
26 from .extractor.dailymotion import DailymotionIE
27 from .extractor.depositfiles import DepositFilesIE
28 from .extractor.escapist import EscapistIE
29 from .extractor.facebook import FacebookIE
30 from .extractor.funnyordie import FunnyOrDieIE
31 from .extractor.gametrailers import GametrailersIE
32 from .extractor.generic import GenericIE
33 from .extractor.googleplus import GooglePlusIE
34 from .extractor.googlesearch import GoogleSearchIE
35 from .extractor.infoq import InfoQIE
36 from .extractor.justintv import JustinTVIE
37 from .extractor.metacafe import MetacafeIE
38 from .extractor.mixcloud import MixcloudIE
39 from .extractor.mtv import MTVIE
40 from .extractor.myvideo import MyVideoIE
41 from .extractor.nba import NBAIE
42 from .extractor.statigram import StatigramIE
43 from .extractor.photobucket import PhotobucketIE
44 from .extractor.pornotube import PornotubeIE
45 from .extractor.rbmaradio import RBMARadioIE
46 from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
47 from .extractor.stanfordoc import StanfordOpenClassroomIE
48 from .extractor.steam import SteamIE
49 from .extractor.ted import TEDIE
50 from .extractor.ustream import UstreamIE
51 from .extractor.vimeo import VimeoIE
52 from .extractor.worldstarhiphop import WorldStarHipHopIE
53 from .extractor.xnxx import XNXXIE
54 from .extractor.xvideos import XVideosIE
55 from .extractor.yahoo import YahooIE, YahooSearchIE
56 from .extractor.youku import YoukuIE
57 from .extractor.youporn import YouPornIE
58 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
59 from .extractor.zdf import ZDFIE
84 class YouJizzIE(InfoExtractor):
85 """Information extractor for youjizz.com."""
86 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
88 def _real_extract(self, url):
89 mobj = re.match(self._VALID_URL, url)
91 raise ExtractorError(u'Invalid URL: %s' % url)
93 video_id = mobj.group('videoid')
96 webpage = self._download_webpage(url, video_id)
99 video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
100 webpage, u'title').strip()
103 result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
105 raise ExtractorError(u'ERROR: unable to extract embed page')
107 embed_page_url = result.group(0).strip()
108 video_id = result.group('videoid')
110 webpage = self._download_webpage(embed_page_url, video_id)
113 video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
114 webpage, u'video URL')
116 info = {'id': video_id,
118 'title': video_title,
121 'player_url': embed_page_url}
125 class EightTracksIE(InfoExtractor):
127 _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
129 def _real_extract(self, url):
130 mobj = re.match(self._VALID_URL, url)
132 raise ExtractorError(u'Invalid URL: %s' % url)
133 playlist_id = mobj.group('id')
135 webpage = self._download_webpage(url, playlist_id)
137 json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
138 data = json.loads(json_like)
140 session = str(random.randint(0, 1000000000))
142 track_count = data['tracks_count']
143 first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
146 for i in itertools.count():
147 api_json = self._download_webpage(next_url, playlist_id,
148 note=u'Downloading song information %s/%s' % (str(i+1), track_count),
149 errnote=u'Failed to download song information')
150 api_data = json.loads(api_json)
151 track_data = api_data[u'set']['track']
153 'id': track_data['id'],
154 'url': track_data['track_file_stream_url'],
155 'title': track_data['performer'] + u' - ' + track_data['name'],
156 'raw_title': track_data['name'],
157 'uploader_id': data['user']['login'],
161 if api_data['set']['at_last_track']:
163 next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
166 class KeekIE(InfoExtractor):
167 _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
170 def _real_extract(self, url):
171 m = re.match(self._VALID_URL, url)
172 video_id = m.group('videoID')
174 video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
175 thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
176 webpage = self._download_webpage(url, video_id)
178 video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
181 uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
182 webpage, u'uploader', fatal=False)
188 'title': video_title,
189 'thumbnail': thumbnail,
195 class MySpassIE(InfoExtractor):
196 _VALID_URL = r'http://www.myspass.de/.*'
198 def _real_extract(self, url):
199 META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
201 # video id is the last path element of the URL
202 # usually there is a trailing slash, so also try the second but last
203 url_path = compat_urllib_parse_urlparse(url).path
204 url_parent_path, video_id = os.path.split(url_path)
206 _, video_id = os.path.split(url_parent_path)
209 metadata_url = META_DATA_URL_TEMPLATE % video_id
210 metadata_text = self._download_webpage(metadata_url, video_id)
211 metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
213 # extract values from metadata
214 url_flv_el = metadata.find('url_flv')
215 if url_flv_el is None:
216 raise ExtractorError(u'Unable to extract download url')
217 video_url = url_flv_el.text
218 extension = os.path.splitext(video_url)[1][1:]
219 title_el = metadata.find('title')
221 raise ExtractorError(u'Unable to extract title')
222 title = title_el.text
223 format_id_el = metadata.find('format_id')
224 if format_id_el is None:
227 format = format_id_el.text
228 description_el = metadata.find('description')
229 if description_el is not None:
230 description = description_el.text
233 imagePreview_el = metadata.find('imagePreview')
234 if imagePreview_el is not None:
235 thumbnail = imagePreview_el.text
244 'thumbnail': thumbnail,
245 'description': description
249 class SpiegelIE(InfoExtractor):
250 _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
252 def _real_extract(self, url):
253 m = re.match(self._VALID_URL, url)
254 video_id = m.group('videoID')
256 webpage = self._download_webpage(url, video_id)
258 video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
261 xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
262 xml_code = self._download_webpage(xml_url, video_id,
263 note=u'Downloading XML', errnote=u'Failed to download XML')
265 idoc = xml.etree.ElementTree.fromstring(xml_code)
267 filename = last_type.findall('./filename')[0].text
268 duration = float(last_type.findall('./duration')[0].text)
270 video_url = 'http://video2.spiegel.de/flash/' + filename
271 video_ext = filename.rpartition('.')[2]
276 'title': video_title,
277 'duration': duration,
281 class LiveLeakIE(InfoExtractor):
283 _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
284 IE_NAME = u'liveleak'
286 def _real_extract(self, url):
287 mobj = re.match(self._VALID_URL, url)
289 raise ExtractorError(u'Invalid URL: %s' % url)
291 video_id = mobj.group('video_id')
293 webpage = self._download_webpage(url, video_id)
295 video_url = self._search_regex(r'file: "(.*?)",',
296 webpage, u'video URL')
298 video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
299 webpage, u'title').replace('LiveLeak.com -', '').strip()
301 video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
302 webpage, u'description', fatal=False)
304 video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
305 webpage, u'uploader', fatal=False)
311 'title': video_title,
312 'description': video_description,
313 'uploader': video_uploader
320 class TumblrIE(InfoExtractor):
321 _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
323 def _real_extract(self, url):
324 m_url = re.match(self._VALID_URL, url)
325 video_id = m_url.group('id')
326 blog = m_url.group('blog_name')
328 url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
329 webpage = self._download_webpage(url, video_id)
331 re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
332 video = re.search(re_video, webpage)
334 raise ExtractorError(u'Unable to extract video')
335 video_url = video.group('video_url')
336 ext = video.group('ext')
338 video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
339 webpage, u'thumbnail', fatal=False) # We pick the first poster
340 if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
342 # The only place where you can get a title, it's not complete,
343 # but searching in other places doesn't work for all videos
344 video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
345 webpage, u'title', flags=re.DOTALL)
347 return [{'id': video_id,
349 'title': video_title,
350 'thumbnail': video_thumbnail,
354 class BandcampIE(InfoExtractor):
355 _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
357 def _real_extract(self, url):
358 mobj = re.match(self._VALID_URL, url)
359 title = mobj.group('title')
360 webpage = self._download_webpage(url, title)
361 # We get the link to the free download page
362 m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
363 if m_download is None:
364 raise ExtractorError(u'No free songs found')
366 download_link = m_download.group(1)
367 id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
368 webpage, re.MULTILINE|re.DOTALL).group('id')
370 download_webpage = self._download_webpage(download_link, id,
371 'Downloading free downloads page')
372 # We get the dictionary of the track from some javascrip code
373 info = re.search(r'items: (.*?),$',
374 download_webpage, re.MULTILINE).group(1)
375 info = json.loads(info)[0]
376 # We pick mp3-320 for now, until format selection can be easily implemented.
377 mp3_info = info[u'downloads'][u'mp3-320']
378 # If we try to use this url it says the link has expired
379 initial_url = mp3_info[u'url']
380 re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
381 m_url = re.match(re_url, initial_url)
382 #We build the url we will use to get the final track url
383 # This url is build in Bandcamp in the script download_bunde_*.js
384 request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
385 final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
386 # If we could correctly generate the .rand field the url would be
387 #in the "download_url" key
388 final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
390 track_info = {'id':id,
391 'title' : info[u'title'],
394 'thumbnail' : info[u'thumb_url'],
395 'uploader' : info[u'artist']
400 class RedTubeIE(InfoExtractor):
401 """Information Extractor for redtube"""
402 _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
404 def _real_extract(self,url):
405 mobj = re.match(self._VALID_URL, url)
407 raise ExtractorError(u'Invalid URL: %s' % url)
409 video_id = mobj.group('id')
410 video_extension = 'mp4'
411 webpage = self._download_webpage(url, video_id)
413 self.report_extraction(video_id)
415 video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
416 webpage, u'video URL')
418 video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
424 'ext': video_extension,
425 'title': video_title,
428 class InaIE(InfoExtractor):
429 """Information Extractor for Ina.fr"""
430 _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
432 def _real_extract(self,url):
433 mobj = re.match(self._VALID_URL, url)
435 video_id = mobj.group('id')
436 mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
437 video_extension = 'mp4'
438 webpage = self._download_webpage(mrss_url, video_id)
440 self.report_extraction(video_id)
442 video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
443 webpage, u'video URL')
445 video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
451 'ext': video_extension,
452 'title': video_title,
455 class HowcastIE(InfoExtractor):
456 """Information Extractor for Howcast.com"""
457 _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
459 def _real_extract(self, url):
460 mobj = re.match(self._VALID_URL, url)
462 video_id = mobj.group('id')
463 webpage_url = 'http://www.howcast.com/videos/' + video_id
464 webpage = self._download_webpage(webpage_url, video_id)
466 self.report_extraction(video_id)
468 video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
469 webpage, u'video URL')
471 video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
474 video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
475 webpage, u'description', fatal=False)
477 thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
478 webpage, u'thumbnail', fatal=False)
484 'title': video_title,
485 'description': video_description,
486 'thumbnail': thumbnail,
489 class VineIE(InfoExtractor):
490 """Information Extractor for Vine.co"""
491 _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
493 def _real_extract(self, url):
494 mobj = re.match(self._VALID_URL, url)
496 video_id = mobj.group('id')
497 webpage_url = 'https://vine.co/v/' + video_id
498 webpage = self._download_webpage(webpage_url, video_id)
500 self.report_extraction(video_id)
502 video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
503 webpage, u'video URL')
505 video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
508 thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
509 webpage, u'thumbnail', fatal=False)
511 uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
512 webpage, u'uploader', fatal=False, flags=re.DOTALL)
518 'title': video_title,
519 'thumbnail': thumbnail,
520 'uploader': uploader,
523 class FlickrIE(InfoExtractor):
524 """Information Extractor for Flickr videos"""
525 _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
527 def _real_extract(self, url):
528 mobj = re.match(self._VALID_URL, url)
530 video_id = mobj.group('id')
531 video_uploader_id = mobj.group('uploader_id')
532 webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
533 webpage = self._download_webpage(webpage_url, video_id)
535 secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
537 first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
538 first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
540 node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
541 first_xml, u'node_id')
543 second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
544 second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
546 self.report_extraction(video_id)
548 mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
550 raise ExtractorError(u'Unable to extract video url')
551 video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
553 video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
554 webpage, u'video title')
556 video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
557 webpage, u'description', fatal=False)
559 thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
560 webpage, u'thumbnail', fatal=False)
566 'title': video_title,
567 'description': video_description,
568 'thumbnail': thumbnail,
569 'uploader_id': video_uploader_id,
572 class TeamcocoIE(InfoExtractor):
573 _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
575 def _real_extract(self, url):
576 mobj = re.match(self._VALID_URL, url)
578 raise ExtractorError(u'Invalid URL: %s' % url)
579 url_title = mobj.group('url_title')
580 webpage = self._download_webpage(url, url_title)
582 video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
583 webpage, u'video id')
585 self.report_extraction(video_id)
587 video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
590 thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
591 webpage, u'thumbnail', fatal=False)
593 video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
594 webpage, u'description', fatal=False)
596 data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
597 data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
599 video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
606 'title': video_title,
607 'thumbnail': thumbnail,
608 'description': video_description,
611 class XHamsterIE(InfoExtractor):
612 """Information Extractor for xHamster"""
613 _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
615 def _real_extract(self,url):
616 mobj = re.match(self._VALID_URL, url)
618 video_id = mobj.group('id')
619 mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
620 webpage = self._download_webpage(mrss_url, video_id)
622 mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
624 raise ExtractorError(u'Unable to extract media URL')
625 if len(mobj.group('server')) == 0:
626 video_url = compat_urllib_parse.unquote(mobj.group('file'))
628 video_url = mobj.group('server')+'/key='+mobj.group('file')
629 video_extension = video_url.split('.')[-1]
631 video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
634 # Can't see the description anywhere in the UI
635 # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
636 # webpage, u'description', fatal=False)
637 # if video_description: video_description = unescapeHTML(video_description)
639 mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
641 video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
643 video_upload_date = None
644 self._downloader.report_warning(u'Unable to extract upload date')
646 video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
647 webpage, u'uploader id', default=u'anonymous')
649 video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
650 webpage, u'thumbnail', fatal=False)
655 'ext': video_extension,
656 'title': video_title,
657 # 'description': video_description,
658 'upload_date': video_upload_date,
659 'uploader_id': video_uploader_id,
660 'thumbnail': video_thumbnail
663 class HypemIE(InfoExtractor):
664 """Information Extractor for hypem"""
665 _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
667 def _real_extract(self, url):
668 mobj = re.match(self._VALID_URL, url)
670 raise ExtractorError(u'Invalid URL: %s' % url)
671 track_id = mobj.group(1)
673 data = { 'ax': 1, 'ts': time.time() }
674 data_encoded = compat_urllib_parse.urlencode(data)
675 complete_url = url + "?" + data_encoded
676 request = compat_urllib_request.Request(complete_url)
677 response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
678 cookie = urlh.headers.get('Set-Cookie', '')
680 self.report_extraction(track_id)
682 html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
683 response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
685 track_list = json.loads(html_tracks)
686 track = track_list[u'tracks'][0]
688 raise ExtractorError(u'Hypemachine contained invalid JSON.')
691 track_id = track[u"id"]
692 artist = track[u"artist"]
693 title = track[u"song"]
695 serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
696 request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
697 request.add_header('cookie', cookie)
698 song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
700 song_data = json.loads(song_data_json)
702 raise ExtractorError(u'Hypemachine contained invalid JSON.')
703 final_url = song_data[u"url"]
713 class Vbox7IE(InfoExtractor):
714 """Information Extractor for Vbox7"""
715 _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
717 def _real_extract(self,url):
718 mobj = re.match(self._VALID_URL, url)
720 raise ExtractorError(u'Invalid URL: %s' % url)
721 video_id = mobj.group(1)
723 redirect_page, urlh = self._download_webpage_handle(url, video_id)
724 new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
725 redirect_url = urlh.geturl() + new_location
726 webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
728 title = self._html_search_regex(r'<title>(.*)</title>',
729 webpage, u'title').split('/')[0].strip()
732 info_url = "http://vbox7.com/play/magare.do"
733 data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
734 info_request = compat_urllib_request.Request(info_url, data)
735 info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
736 info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
737 if info_response is None:
738 raise ExtractorError(u'Unable to extract the media url')
739 (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
746 'thumbnail': thumbnail_url,
750 def gen_extractors():
751 """ Return a list of an instance of every supported extractor.
752 The order does matter; the first extractor matched is the one handling the URL.
780 StanfordOpenClassroomIE(),
820 def get_info_extractor(ie_name):
821 """Returns the info extractor class with the given ie_name"""
822 return globals()[ie_name+'IE']