[redtube] move into own file

[youtube-dl.git] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index 6a04735fa0a94ce27f27b540e12b84c10088daa2..c06ecbe52aad148ce036cd98c74ef1f371b27c78 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -20,35 +20,52 @@ from .extractor.common import InfoExtractor, SearchInfoExtractor
  
  from .extractor.ard import ARDIE
  from .extractor.arte import ArteTvIE
  
  from .extractor.ard import ARDIE
  from .extractor.arte import ArteTvIE
+from .extractor.bandcamp import BandcampIE
  from .extractor.bliptv import BlipTVIE, BlipTVUserIE
  from .extractor.comedycentral import ComedyCentralIE
  from .extractor.collegehumor import CollegeHumorIE
  from .extractor.dailymotion import DailymotionIE
  from .extractor.depositfiles import DepositFilesIE
  from .extractor.bliptv import BlipTVIE, BlipTVUserIE
  from .extractor.comedycentral import ComedyCentralIE
  from .extractor.collegehumor import CollegeHumorIE
  from .extractor.dailymotion import DailymotionIE
  from .extractor.depositfiles import DepositFilesIE
+from .extractor.eighttracks import EightTracksIE
  from .extractor.escapist import EscapistIE
  from .extractor.facebook import FacebookIE
  from .extractor.escapist import EscapistIE
  from .extractor.facebook import FacebookIE
+from .extractor.funnyordie import FunnyOrDieIE
  from .extractor.gametrailers import GametrailersIE
  from .extractor.generic import GenericIE
  from .extractor.googleplus import GooglePlusIE
  from .extractor.googlesearch import GoogleSearchIE
  from .extractor.infoq import InfoQIE
  from .extractor.gametrailers import GametrailersIE
  from .extractor.generic import GenericIE
  from .extractor.googleplus import GooglePlusIE
  from .extractor.googlesearch import GoogleSearchIE
  from .extractor.infoq import InfoQIE
+from .extractor.justintv import JustinTVIE
+from .extractor.keek import KeekIE
+from .extractor.liveleak import LiveLeakIE
  from .extractor.metacafe import MetacafeIE
  from .extractor.mixcloud import MixcloudIE
  from .extractor.mtv import MTVIE
  from .extractor.metacafe import MetacafeIE
  from .extractor.mixcloud import MixcloudIE
  from .extractor.mtv import MTVIE
+from .extractor.myspass import MySpassIE
  from .extractor.myvideo import MyVideoIE
  from .extractor.nba import NBAIE
  from .extractor.statigram import StatigramIE
  from .extractor.photobucket import PhotobucketIE
  from .extractor.myvideo import MyVideoIE
  from .extractor.nba import NBAIE
  from .extractor.statigram import StatigramIE
  from .extractor.photobucket import PhotobucketIE
+from .extractor.pornotube import PornotubeIE
+from .extractor.rbmaradio import RBMARadioIE
+from .extractor.redtube import RedTubeIE
  from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
  from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
+from .extractor.spiegel import SpiegelIE
  from .extractor.stanfordoc import StanfordOpenClassroomIE
  from .extractor.steam import SteamIE
  from .extractor.ted import TEDIE
  from .extractor.stanfordoc import StanfordOpenClassroomIE
  from .extractor.steam import SteamIE
  from .extractor.ted import TEDIE
+from .extractor.tumblr import TumblrIE
+from .extractor.ustream import UstreamIE
+from .extractor.vbox7 import Vbox7IE
  from .extractor.vimeo import VimeoIE
  from .extractor.vimeo import VimeoIE
+from .extractor.vine import VineIE
  from .extractor.worldstarhiphop import WorldStarHipHopIE
  from .extractor.xnxx import XNXXIE
  from .extractor.xvideos import XVideosIE
  from .extractor.yahoo import YahooIE, YahooSearchIE
  from .extractor.worldstarhiphop import WorldStarHipHopIE
  from .extractor.xnxx import XNXXIE
  from .extractor.xvideos import XVideosIE
  from .extractor.yahoo import YahooIE, YahooSearchIE
+from .extractor.youjizz import YouJizzIE
  from .extractor.youku import YoukuIE
  from .extractor.youku import YoukuIE
+from .extractor.youporn import YouPornIE
  from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
  from .extractor.zdf import ZDFIE
  
  from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
  from .extractor.zdf import ZDFIE
  
@@ -64,723 +81,28 @@ from .extractor.zdf import ZDFIE
  
  
  
  
  
  
-class JustinTVIE(InfoExtractor):
-    """Information extractor for justin.tv and twitch.tv"""
-    # TODO: One broadcast may be split into multiple videos. The key
-    # 'broadcast_id' is the same for all parts, and 'broadcast_part'
-    # starts at 1 and increases. Can we treat all parts as one video?
  
  
-    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
-        (?:
-            (?P<channelid>[^/]+)|
-            (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
-            (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
-        )
-        /?(?:\#.*)?$
-        """
-    _JUSTIN_PAGE_LIMIT = 100
-    IE_NAME = u'justin.tv'
  
  
-    def report_download_page(self, channel, offset):
-        """Report attempt to download a single page of videos."""
-        self.to_screen(u'%s: Downloading video information from %d to %d' %
-                (channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
  
  
-    # Return count of items, list of *valid* items
-    def _parse_page(self, url, video_id):
-        webpage = self._download_webpage(url, video_id,
-                                         u'Downloading video info JSON',
-                                         u'unable to download video info JSON')
  
  
-        response = json.loads(webpage)
-        if type(response) != list:
-            error_text = response.get('error', 'unknown error')
-            raise ExtractorError(u'Justin.tv API: %s' % error_text)
-        info = []
-        for clip in response:
-            video_url = clip['video_file_url']
-            if video_url:
-                video_extension = os.path.splitext(video_url)[1][1:]
-                video_date = re.sub('-', '', clip['start_time'][:10])
-                video_uploader_id = clip.get('user_id', clip.get('channel_id'))
-                video_id = clip['id']
-                video_title = clip.get('title', video_id)
-                info.append({
-                    'id': video_id,
-                    'url': video_url,
-                    'title': video_title,
-                    'uploader': clip.get('channel_name', video_uploader_id),
-                    'uploader_id': video_uploader_id,
-                    'upload_date': video_date,
-                    'ext': video_extension,
-                })
-        return (len(response), info)
  
  
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'invalid URL: %s' % url)
-
-        api_base = 'http://api.justin.tv'
-        paged = False
-        if mobj.group('channelid'):
-            paged = True
-            video_id = mobj.group('channelid')
-            api = api_base + '/channel/archives/%s.json' % video_id
-        elif mobj.group('chapterid'):
-            chapter_id = mobj.group('chapterid')
-
-            webpage = self._download_webpage(url, chapter_id)
-            m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
-            if not m:
-                raise ExtractorError(u'Cannot find archive of a chapter')
-            archive_id = m.group(1)
-
-            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
-            chapter_info_xml = self._download_webpage(api, chapter_id,
-                                             note=u'Downloading chapter information',
-                                             errnote=u'Chapter information download failed')
-            doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
-            for a in doc.findall('.//archive'):
-                if archive_id == a.find('./id').text:
-                    break
-            else:
-                raise ExtractorError(u'Could not find chapter in chapter information')
-
-            video_url = a.find('./video_file_url').text
-            video_ext = video_url.rpartition('.')[2] or u'flv'
-
-            chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
-            chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
-                                   note='Downloading chapter metadata',
-                                   errnote='Download of chapter metadata failed')
-            chapter_info = json.loads(chapter_info_json)
-
-            bracket_start = int(doc.find('.//bracket_start').text)
-            bracket_end = int(doc.find('.//bracket_end').text)
-
-            # TODO determine start (and probably fix up file)
-            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
-            #video_url += u'?start=' + TODO:start_timestamp
-            # bracket_start is 13290, but we want 51670615
-            self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
-                                            u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
-
-            info = {
-                'id': u'c' + chapter_id,
-                'url': video_url,
-                'ext': video_ext,
-                'title': chapter_info['title'],
-                'thumbnail': chapter_info['preview'],
-                'description': chapter_info['description'],
-                'uploader': chapter_info['channel']['display_name'],
-                'uploader_id': chapter_info['channel']['name'],
-            }
-            return [info]
-        else:
-            video_id = mobj.group('videoid')
-            api = api_base + '/broadcast/by_archive/%s.json' % video_id
-
-        self.report_extraction(video_id)
-
-        info = []
-        offset = 0
-        limit = self._JUSTIN_PAGE_LIMIT
-        while True:
-            if paged:
-                self.report_download_page(video_id, offset)
-            page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
-            page_count, page_info = self._parse_page(page_url, video_id)
-            info.extend(page_info)
-            if not paged or page_count != limit:
-                break
-            offset += limit
-        return info
-
-class FunnyOrDieIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'invalid URL: %s' % url)
-
-        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-
-        video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
-            webpage, u'video URL', flags=re.DOTALL)
-
-        title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
-            r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
-
-        video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
-            webpage, u'description', fatal=False, flags=re.DOTALL)
-
-        info = {
-            'id': video_id,
-            'url': video_url,
-            'ext': 'mp4',
-            'title': title,
-            'description': video_description,
-        }
-        return [info]
-
-
-class UstreamIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
-    IE_NAME = u'ustream'
-
-    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('videoID')
-
-        video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
-        webpage = self._download_webpage(url, video_id)
-
-        self.report_extraction(video_id)
-
-        video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
-            webpage, u'title')
-
-        uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
-            webpage, u'uploader', fatal=False, flags=re.DOTALL)
-
-        thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
-            webpage, u'thumbnail', fatal=False)
-
-        info = {
-                'id': video_id,
-                'url': video_url,
-                'ext': 'flv',
-                'title': video_title,
-                'uploader': uploader,
-                'thumbnail': thumbnail,
-               }
-        return info
-
-
-class RBMARadioIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
-
-    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('videoID')
-
-        webpage = self._download_webpage(url, video_id)
-
-        json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
-            webpage, u'json data', flags=re.MULTILINE)
-
-        try:
-            data = json.loads(json_data)
-        except ValueError as e:
-            raise ExtractorError(u'Invalid JSON: ' + str(e))
-
-        video_url = data['akamai_url'] + '&cbr=256'
-        url_parts = compat_urllib_parse_urlparse(video_url)
-        video_ext = url_parts.path.rpartition('.')[2]
-        info = {
-                'id': video_id,
-                'url': video_url,
-                'ext': video_ext,
-                'title': data['title'],
-                'description': data.get('teaser_text'),
-                'location': data.get('country_of_origin'),
-                'uploader': data.get('host', {}).get('name'),
-                'uploader_id': data.get('host', {}).get('slug'),
-                'thumbnail': data.get('image', {}).get('large_url_2x'),
-                'duration': data.get('duration'),
-        }
-        return [info]
-
-
-class YouPornIE(InfoExtractor):
-    """Information extractor for youporn.com."""
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
-
-    def _print_formats(self, formats):
-        """Print all available formats"""
-        print(u'Available formats:')
-        print(u'ext\t\tformat')
-        print(u'---------------------------------')
-        for format in formats:
-            print(u'%s\t\t%s'  % (format['ext'], format['format']))
-
-    def _specific(self, req_format, formats):
-        for x in formats:
-            if(x["format"]==req_format):
-                return x
-        return None
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-        video_id = mobj.group('videoid')
-
-        req = compat_urllib_request.Request(url)
-        req.add_header('Cookie', 'age_verified=1')
-        webpage = self._download_webpage(req, video_id)
-
-        # Get JSON parameters
-        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
-        try:
-            params = json.loads(json_params)
-        except:
-            raise ExtractorError(u'Invalid JSON')
-
-        self.report_extraction(video_id)
-        try:
-            video_title = params['title']
-            upload_date = unified_strdate(params['release_date_f'])
-            video_description = params['description']
-            video_uploader = params['submitted_by']
-            thumbnail = params['thumbnails'][0]['image']
-        except KeyError:
-            raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
-
-        # Get all of the formats available
-        DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
-        download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
-            webpage, u'download list').strip()
-
-        # Get all of the links from the page
-        LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
-        links = re.findall(LINK_RE, download_list_html)
-        if(len(links) == 0):
-            raise ExtractorError(u'ERROR: no known formats available for video')
-
-        self.to_screen(u'Links found: %d' % len(links))
-
-        formats = []
-        for link in links:
-
-            # A link looks like this:
-            # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
-            # A path looks like this:
-            # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
-            video_url = unescapeHTML( link )
-            path = compat_urllib_parse_urlparse( video_url ).path
-            extension = os.path.splitext( path )[1][1:]
-            format = path.split('/')[4].split('_')[:2]
-            size = format[0]
-            bitrate = format[1]
-            format = "-".join( format )
-            # title = u'%s-%s-%s' % (video_title, size, bitrate)
-
-            formats.append({
-                'id': video_id,
-                'url': video_url,
-                'uploader': video_uploader,
-                'upload_date': upload_date,
-                'title': video_title,
-                'ext': extension,
-                'format': format,
-                'thumbnail': thumbnail,
-                'description': video_description
-            })
-
-        if self._downloader.params.get('listformats', None):
-            self._print_formats(formats)
-            return
-
-        req_format = self._downloader.params.get('format', None)
-        self.to_screen(u'Format: %s' % req_format)
-
-        if req_format is None or req_format == 'best':
-            return [formats[0]]
-        elif req_format == 'worst':
-            return [formats[-1]]
-        elif req_format in ('-1', 'all'):
-            return formats
-        else:
-            format = self._specific( req_format, formats )
-            if result is None:
-                raise ExtractorError(u'Requested format not available')
-            return [format]
-
-
-
-class PornotubeIE(InfoExtractor):
-    """Information extractor for pornotube.com."""
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
-        video_id = mobj.group('videoid')
-        video_title = mobj.group('title')
-
-        # Get webpage content
-        webpage = self._download_webpage(url, video_id)
-
-        # Get the video URL
-        VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
-        video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
-        video_url = compat_urllib_parse.unquote(video_url)
-
-        #Get the uploaded date
-        VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
-        upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
-        if upload_date: upload_date = unified_strdate(upload_date)
-
-        info = {'id': video_id,
-                'url': video_url,
-                'uploader': None,
-                'upload_date': upload_date,
-                'title': video_title,
-                'ext': 'flv',
-                'format': 'flv'}
-
-        return [info]
-
-class YouJizzIE(InfoExtractor):
-    """Information extractor for youjizz.com."""
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
-        video_id = mobj.group('videoid')
-
-        # Get webpage content
-        webpage = self._download_webpage(url, video_id)
-
-        # Get the video title
-        video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
-            webpage, u'title').strip()
-
-        # Get the embed page
-        result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
-        if result is None:
-            raise ExtractorError(u'ERROR: unable to extract embed page')
-
-        embed_page_url = result.group(0).strip()
-        video_id = result.group('videoid')
-
-        webpage = self._download_webpage(embed_page_url, video_id)
-
-        # Get the video URL
-        video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
-            webpage, u'video URL')
-
-        info = {'id': video_id,
-                'url': video_url,
-                'title': video_title,
-                'ext': 'flv',
-                'format': 'flv',
-                'player_url': embed_page_url}
-
-        return [info]
-
-class EightTracksIE(InfoExtractor):
-    IE_NAME = '8tracks'
-    _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-        playlist_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, playlist_id)
-
-        json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
-        data = json.loads(json_like)
-
-        session = str(random.randint(0, 1000000000))
-        mix_id = data['id']
-        track_count = data['tracks_count']
-        first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
-        next_url = first_url
-        res = []
-        for i in itertools.count():
-            api_json = self._download_webpage(next_url, playlist_id,
-                note=u'Downloading song information %s/%s' % (str(i+1), track_count),
-                errnote=u'Failed to download song information')
-            api_data = json.loads(api_json)
-            track_data = api_data[u'set']['track']
-            info = {
-                'id': track_data['id'],
-                'url': track_data['track_file_stream_url'],
-                'title': track_data['performer'] + u' - ' + track_data['name'],
-                'raw_title': track_data['name'],
-                'uploader_id': data['user']['login'],
-                'ext': 'm4a',
-            }
-            res.append(info)
-            if api_data['set']['at_last_track']:
-                break
-            next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
-        return res
-
-class KeekIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
-    IE_NAME = u'keek'
-
-    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('videoID')
-
-        video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
-        thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
-        webpage = self._download_webpage(url, video_id)
-
-        video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
-            webpage, u'title')
-
-        uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
-            webpage, u'uploader', fatal=False)
-
-        info = {
-                'id': video_id,
-                'url': video_url,
-                'ext': 'mp4',
-                'title': video_title,
-                'thumbnail': thumbnail,
-                'uploader': uploader
-        }
-        return [info]
-
-
-class MySpassIE(InfoExtractor):
-    _VALID_URL = r'http://www.myspass.de/.*'
-
-    def _real_extract(self, url):
-        META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
-
-        # video id is the last path element of the URL
-        # usually there is a trailing slash, so also try the second but last
-        url_path = compat_urllib_parse_urlparse(url).path
-        url_parent_path, video_id = os.path.split(url_path)
-        if not video_id:
-            _, video_id = os.path.split(url_parent_path)
-
-        # get metadata
-        metadata_url = META_DATA_URL_TEMPLATE % video_id
-        metadata_text = self._download_webpage(metadata_url, video_id)
-        metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
-
-        # extract values from metadata
-        url_flv_el = metadata.find('url_flv')
-        if url_flv_el is None:
-            raise ExtractorError(u'Unable to extract download url')
-        video_url = url_flv_el.text
-        extension = os.path.splitext(video_url)[1][1:]
-        title_el = metadata.find('title')
-        if title_el is None:
-            raise ExtractorError(u'Unable to extract title')
-        title = title_el.text
-        format_id_el = metadata.find('format_id')
-        if format_id_el is None:
-            format = ext
-        else:
-            format = format_id_el.text
-        description_el = metadata.find('description')
-        if description_el is not None:
-            description = description_el.text
-        else:
-            description = None
-        imagePreview_el = metadata.find('imagePreview')
-        if imagePreview_el is not None:
-            thumbnail = imagePreview_el.text
-        else:
-            thumbnail = None
-        info = {
-            'id': video_id,
-            'url': video_url,
-            'title': title,
-            'ext': extension,
-            'format': format,
-            'thumbnail': thumbnail,
-            'description': description
-        }
-        return [info]
-
-class SpiegelIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
-
-    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('videoID')
  
  
-        webpage = self._download_webpage(url, video_id)
  
  
-        video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
-            webpage, u'title')
-
-        xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
-        xml_code = self._download_webpage(xml_url, video_id,
-                    note=u'Downloading XML', errnote=u'Failed to download XML')
  
  
-        idoc = xml.etree.ElementTree.fromstring(xml_code)
-        last_type = idoc[-1]
-        filename = last_type.findall('./filename')[0].text
-        duration = float(last_type.findall('./duration')[0].text)
  
  
-        video_url = 'http://video2.spiegel.de/flash/' + filename
-        video_ext = filename.rpartition('.')[2]
-        info = {
-            'id': video_id,
-            'url': video_url,
-            'ext': video_ext,
-            'title': video_title,
-            'duration': duration,
-        }
-        return [info]
  
  
-class LiveLeakIE(InfoExtractor):
  
  
-    _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
-    IE_NAME = u'liveleak'
  
  
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
  
  
-        video_id = mobj.group('video_id')
  
  
-        webpage = self._download_webpage(url, video_id)
-
-        video_url = self._search_regex(r'file: "(.*?)",',
-            webpage, u'video URL')
-
-        video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
-            webpage, u'title').replace('LiveLeak.com -', '').strip()
-
-        video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
-            webpage, u'description', fatal=False)
  
  
-        video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
-            webpage, u'uploader', fatal=False)
  
  
-        info = {
-            'id':  video_id,
-            'url': video_url,
-            'ext': 'mp4',
-            'title': video_title,
-            'description': video_description,
-            'uploader': video_uploader
-        }
-
-        return [info]
-
-
-
-class TumblrIE(InfoExtractor):
-    _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
-
-    def _real_extract(self, url):
-        m_url = re.match(self._VALID_URL, url)
-        video_id = m_url.group('id')
-        blog = m_url.group('blog_name')
-
-        url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
-        webpage = self._download_webpage(url, video_id)
-
-        re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
-        video = re.search(re_video, webpage)
-        if video is None:
-           raise ExtractorError(u'Unable to extract video')
-        video_url = video.group('video_url')
-        ext = video.group('ext')
-
-        video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
-            webpage, u'thumbnail', fatal=False)  # We pick the first poster
-        if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
-
-        # The only place where you can get a title, it's not complete,
-        # but searching in other places doesn't work for all videos
-        video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
-            webpage, u'title', flags=re.DOTALL)
-
-        return [{'id': video_id,
-                 'url': video_url,
-                 'title': video_title,
-                 'thumbnail': video_thumbnail,
-                 'ext': ext
-                 }]
-
-class BandcampIE(InfoExtractor):
-    _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        title = mobj.group('title')
-        webpage = self._download_webpage(url, title)
-        # We get the link to the free download page
-        m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
-        if m_download is None:
-            raise ExtractorError(u'No free songs found')
-
-        download_link = m_download.group(1)
-        id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', 
-                       webpage, re.MULTILINE|re.DOTALL).group('id')
-
-        download_webpage = self._download_webpage(download_link, id,
-                                                  'Downloading free downloads page')
-        # We get the dictionary of the track from some javascrip code
-        info = re.search(r'items: (.*?),$',
-                         download_webpage, re.MULTILINE).group(1)
-        info = json.loads(info)[0]
-        # We pick mp3-320 for now, until format selection can be easily implemented.
-        mp3_info = info[u'downloads'][u'mp3-320']
-        # If we try to use this url it says the link has expired
-        initial_url = mp3_info[u'url']
-        re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
-        m_url = re.match(re_url, initial_url)
-        #We build the url we will use to get the final track url
-        # This url is build in Bandcamp in the script download_bunde_*.js
-        request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
-        final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
-        # If we could correctly generate the .rand field the url would be
-        #in the "download_url" key
-        final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
-
-        track_info = {'id':id,
-                      'title' : info[u'title'],
-                      'ext' :   'mp3',
-                      'url' :   final_url,
-                      'thumbnail' : info[u'thumb_url'],
-                      'uploader' :  info[u'artist']
-                      }
-
-        return [track_info]
-
-class RedTubeIE(InfoExtractor):
-    """Information Extractor for redtube"""
-    _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
  
  
-    def _real_extract(self,url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
  
  
-        video_id = mobj.group('id')
-        video_extension = 'mp4'        
-        webpage = self._download_webpage(url, video_id)
  
  
-        self.report_extraction(video_id)
  
  
-        video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
-            webpage, u'video URL')
  
  
-        video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
-            webpage, u'title')
  
  
-        return [{
-            'id':       video_id,
-            'url':      video_url,
-            'ext':      video_extension,
-            'title':    video_title,
-        }]
-        
  class InaIE(InfoExtractor):
      """Information Extractor for Ina.fr"""
      _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
  class InaIE(InfoExtractor):
      """Information Extractor for Ina.fr"""
      _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
@@ -842,39 +164,6 @@ class HowcastIE(InfoExtractor):
              'thumbnail': thumbnail,
          }]
  
              'thumbnail': thumbnail,
          }]
  
-class VineIE(InfoExtractor):
-    """Information Extractor for Vine.co"""
-    _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        video_id = mobj.group('id')
-        webpage_url = 'https://vine.co/v/' + video_id
-        webpage = self._download_webpage(webpage_url, video_id)
-
-        self.report_extraction(video_id)
-
-        video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
-            webpage, u'video URL')
-
-        video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
-            webpage, u'title')
-
-        thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
-            webpage, u'thumbnail', fatal=False)
-
-        uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
-            webpage, u'uploader', fatal=False, flags=re.DOTALL)
-
-        return [{
-            'id':        video_id,
-            'url':       video_url,
-            'ext':       'mp4',
-            'title':     video_title,
-            'thumbnail': thumbnail,
-            'uploader':  uploader,
-        }]
  
  class FlickrIE(InfoExtractor):
      """Information Extractor for Flickr videos"""
  
  class FlickrIE(InfoExtractor):
      """Information Extractor for Flickr videos"""
@@ -1066,41 +355,6 @@ class HypemIE(InfoExtractor):
              'artist':   artist,
          }]
  
              'artist':   artist,
          }]
  
-class Vbox7IE(InfoExtractor):
-    """Information Extractor for Vbox7"""
-    _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
-
-    def _real_extract(self,url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-        video_id = mobj.group(1)
-
-        redirect_page, urlh = self._download_webpage_handle(url, video_id)
-        new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
-        redirect_url = urlh.geturl() + new_location
-        webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
-
-        title = self._html_search_regex(r'<title>(.*)</title>',
-            webpage, u'title').split('/')[0].strip()
-
-        ext = "flv"
-        info_url = "http://vbox7.com/play/magare.do"
-        data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
-        info_request = compat_urllib_request.Request(info_url, data)
-        info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
-        if info_response is None:
-            raise ExtractorError(u'Unable to extract the media url')
-        (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
-
-        return [{
-            'id':        video_id,
-            'url':       final_url,
-            'ext':       ext,
-            'title':     title,
-            'thumbnail': thumbnail_url,
-        }]
  
  
  def gen_extractors():
  
  
  def gen_extractors():