+class SpiegelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ video_id = m.group('videoID')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
+ webpage, u'title')
+
+ xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
+ xml_code = self._download_webpage(xml_url, video_id,
+ note=u'Downloading XML', errnote=u'Failed to download XML')
+
+ idoc = xml.etree.ElementTree.fromstring(xml_code)
+ last_type = idoc[-1]
+ filename = last_type.findall('./filename')[0].text
+ duration = float(last_type.findall('./duration')[0].text)
+
+ video_url = 'http://video2.spiegel.de/flash/' + filename
+ video_ext = filename.rpartition('.')[2]
+ info = {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': video_ext,
+ 'title': video_title,
+ 'duration': duration,
+ }
+ return [info]
+
+class LiveLeakIE(InfoExtractor):
+
+ _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
+ IE_NAME = u'liveleak'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError(u'Invalid URL: %s' % url)
+
+ video_id = mobj.group('video_id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(r'file: "(.*?)",',
+ webpage, u'video URL')
+
+ video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
+ webpage, u'title').replace('LiveLeak.com -', '').strip()
+
+ video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
+ webpage, u'description', fatal=False)
+
+ video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
+ webpage, u'uploader', fatal=False)
+
+ info = {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': video_title,
+ 'description': video_description,
+ 'uploader': video_uploader
+ }
+
+ return [info]
+
+class ARDIE(InfoExtractor):
+ _VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
+ _TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>'
+ _MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)'
+
+ def _real_extract(self, url):
+ # determine video id from url
+ m = re.match(self._VALID_URL, url)
+
+ numid = re.search(r'documentId=([0-9]+)', url)
+ if numid:
+ video_id = numid.group(1)
+ else:
+ video_id = m.group('video_id')
+
+ # determine title and media streams from webpage
+ html = self._download_webpage(url, video_id)
+ title = re.search(self._TITLE, html).group('title')
+ streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
+ if not streams:
+ assert '"fsk"' in html
+ raise ExtractorError(u'This video is only available after 8:00 pm')
+
+ # choose default media type and highest quality for now
+ stream = max([s for s in streams if int(s["media_type"]) == 0],
+ key=lambda s: int(s["quality"]))
+
+ # there's two possibilities: RTMP stream or HTTP download
+ info = {'id': video_id, 'title': title, 'ext': 'mp4'}
+ if stream['rtmp_url']:
+ self.to_screen(u'RTMP download detected')
+ assert stream['video_url'].startswith('mp4:')
+ info["url"] = stream["rtmp_url"]
+ info["play_path"] = stream['video_url']
+ else:
+ assert stream["video_url"].endswith('.mp4')
+ info["url"] = stream["video_url"]
+ return [info]
+
+class TumblrIE(InfoExtractor):
+ _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
+
+ def _real_extract(self, url):
+ m_url = re.match(self._VALID_URL, url)
+ video_id = m_url.group('id')
+ blog = m_url.group('blog_name')
+
+ url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
+ webpage = self._download_webpage(url, video_id)
+
+ re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
+ video = re.search(re_video, webpage)
+ if video is None:
+ raise ExtractorError(u'Unable to extract video')
+ video_url = video.group('video_url')
+ ext = video.group('ext')
+
+ video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
+ webpage, u'thumbnail', fatal=False) # We pick the first poster
+ if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
+
+ # The only place where you can get a title, it's not complete,
+ # but searching in other places doesn't work for all videos
+ video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
+ webpage, u'title', flags=re.DOTALL)
+
+ return [{'id': video_id,
+ 'url': video_url,
+ 'title': video_title,
+ 'thumbnail': video_thumbnail,
+ 'ext': ext
+ }]
+
+class BandcampIE(InfoExtractor):
+ _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ title = mobj.group('title')
+ webpage = self._download_webpage(url, title)
+ # We get the link to the free download page
+ m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
+ if m_download is None:
+ raise ExtractorError(u'No free songs found')
+
+ download_link = m_download.group(1)
+ id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
+ webpage, re.MULTILINE|re.DOTALL).group('id')
+
+ download_webpage = self._download_webpage(download_link, id,
+ 'Downloading free downloads page')
+ # We get the dictionary of the track from some javascrip code
+ info = re.search(r'items: (.*?),$',
+ download_webpage, re.MULTILINE).group(1)
+ info = json.loads(info)[0]
+ # We pick mp3-320 for now, until format selection can be easily implemented.
+ mp3_info = info[u'downloads'][u'mp3-320']
+ # If we try to use this url it says the link has expired
+ initial_url = mp3_info[u'url']
+ re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
+ m_url = re.match(re_url, initial_url)
+ #We build the url we will use to get the final track url
+ # This url is build in Bandcamp in the script download_bunde_*.js
+ request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
+ final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
+ # If we could correctly generate the .rand field the url would be
+ #in the "download_url" key
+ final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
+
+ track_info = {'id':id,
+ 'title' : info[u'title'],
+ 'ext' : 'mp3',
+ 'url' : final_url,
+ 'thumbnail' : info[u'thumb_url'],
+ 'uploader' : info[u'artist']
+ }
+
+ return [track_info]
+
+class RedTubeIE(InfoExtractor):
+ """Information Extractor for redtube"""
+ _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError(u'Invalid URL: %s' % url)
+
+ video_id = mobj.group('id')
+ video_extension = 'mp4'
+ webpage = self._download_webpage(url, video_id)
+
+ self.report_extraction(video_id)
+
+ video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
+ webpage, u'video URL')
+
+ video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
+ webpage, u'title')
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': video_extension,
+ 'title': video_title,
+ }]
+
+class InaIE(InfoExtractor):
+ """Information Extractor for Ina.fr"""
+ _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
+ video_extension = 'mp4'
+ webpage = self._download_webpage(mrss_url, video_id)
+
+ self.report_extraction(video_id)
+
+ video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
+ webpage, u'video URL')
+
+ video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
+ webpage, u'title')
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': video_extension,
+ 'title': video_title,
+ }]
+
+class HowcastIE(InfoExtractor):
+ """Information Extractor for Howcast.com"""
+ _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ webpage_url = 'http://www.howcast.com/videos/' + video_id
+ webpage = self._download_webpage(webpage_url, video_id)
+
+ self.report_extraction(video_id)
+
+ video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
+ webpage, u'video URL')
+
+ video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
+ webpage, u'title')
+
+ video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
+ webpage, u'description', fatal=False)
+
+ thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
+ webpage, u'thumbnail', fatal=False)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': thumbnail,
+ }]
+
+class VineIE(InfoExtractor):
+ """Information Extractor for Vine.co"""
+ _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ webpage_url = 'https://vine.co/v/' + video_id
+ webpage = self._download_webpage(webpage_url, video_id)
+
+ self.report_extraction(video_id)
+
+ video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
+ webpage, u'video URL')
+
+ video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
+ webpage, u'title')
+
+ thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
+ webpage, u'thumbnail', fatal=False)
+
+ uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
+ webpage, u'uploader', fatal=False, flags=re.DOTALL)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': video_title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ }]
+
+class FlickrIE(InfoExtractor):
+ """Information Extractor for Flickr videos"""
+ _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ video_uploader_id = mobj.group('uploader_id')
+ webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
+ webpage = self._download_webpage(webpage_url, video_id)
+
+ secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
+
+ first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
+ first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
+
+ node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
+ first_xml, u'node_id')
+
+ second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
+ second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
+
+ self.report_extraction(video_id)
+
+ mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract video url')
+ video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
+
+ video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
+ webpage, u'video title')
+
+ video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
+ webpage, u'description', fatal=False)
+
+ thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
+ webpage, u'thumbnail', fatal=False)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': thumbnail,
+ 'uploader_id': video_uploader_id,
+ }]
+
+class TeamcocoIE(InfoExtractor):
+ _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError(u'Invalid URL: %s' % url)
+ url_title = mobj.group('url_title')
+ webpage = self._download_webpage(url, url_title)
+
+ video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
+ webpage, u'video id')
+
+ self.report_extraction(video_id)
+
+ video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
+ webpage, u'title')
+
+ thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
+ webpage, u'thumbnail', fatal=False)
+
+ video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
+ webpage, u'description', fatal=False)
+
+ data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
+ data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
+
+ video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
+ data, u'video URL')
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': video_title,
+ 'thumbnail': thumbnail,
+ 'description': video_description,
+ }]
+
+class XHamsterIE(InfoExtractor):
+ """Information Extractor for xHamster"""
+ _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
+ webpage = self._download_webpage(mrss_url, video_id)
+
+ mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract media URL')
+ if len(mobj.group('server')) == 0:
+ video_url = compat_urllib_parse.unquote(mobj.group('file'))
+ else:
+ video_url = mobj.group('server')+'/key='+mobj.group('file')
+ video_extension = video_url.split('.')[-1]
+
+ video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
+ webpage, u'title')
+
+ # Can't see the description anywhere in the UI
+ # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
+ # webpage, u'description', fatal=False)
+ # if video_description: video_description = unescapeHTML(video_description)
+
+ mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
+ if mobj:
+ video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
+ else:
+ video_upload_date = None
+ self._downloader.report_warning(u'Unable to extract upload date')
+
+ video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
+ webpage, u'uploader id', default=u'anonymous')
+
+ video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
+ webpage, u'thumbnail', fatal=False)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': video_extension,
+ 'title': video_title,
+ # 'description': video_description,
+ 'upload_date': video_upload_date,
+ 'uploader_id': video_uploader_id,
+ 'thumbnail': video_thumbnail
+ }]
+
+class HypemIE(InfoExtractor):
+ """Information Extractor for hypem"""
+ _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError(u'Invalid URL: %s' % url)
+ track_id = mobj.group(1)
+
+ data = { 'ax': 1, 'ts': time.time() }
+ data_encoded = compat_urllib_parse.urlencode(data)
+ complete_url = url + "?" + data_encoded
+ request = compat_urllib_request.Request(complete_url)
+ response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
+ cookie = urlh.headers.get('Set-Cookie', '')
+
+ self.report_extraction(track_id)
+
+ html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
+ response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
+ try:
+ track_list = json.loads(html_tracks)
+ track = track_list[u'tracks'][0]
+ except ValueError:
+ raise ExtractorError(u'Hypemachine contained invalid JSON.')
+
+ key = track[u"key"]
+ track_id = track[u"id"]
+ artist = track[u"artist"]
+ title = track[u"song"]
+
+ serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
+ request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
+ request.add_header('cookie', cookie)
+ song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
+ try:
+ song_data = json.loads(song_data_json)
+ except ValueError:
+ raise ExtractorError(u'Hypemachine contained invalid JSON.')
+ final_url = song_data[u"url"]
+
+ return [{
+ 'id': track_id,
+ 'url': final_url,
+ 'ext': "mp3",
+ 'title': title,
+ 'artist': artist,
+ }]
+
+