[redtube] move into own file

[youtube-dl.git] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index 2b8e6e6c64810cb4ed5d303f1abebd57516f6718..c06ecbe52aad148ce036cd98c74ef1f371b27c78 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -20,6 +20,7 @@ from .extractor.common import InfoExtractor, SearchInfoExtractor
  
  from .extractor.ard import ARDIE
  from .extractor.arte import ArteTvIE
+from .extractor.bandcamp import BandcampIE
  from .extractor.bliptv import BlipTVIE, BlipTVUserIE
  from .extractor.comedycentral import ComedyCentralIE
  from .extractor.collegehumor import CollegeHumorIE
@@ -47,6 +48,7 @@ from .extractor.statigram import StatigramIE
  from .extractor.photobucket import PhotobucketIE
  from .extractor.pornotube import PornotubeIE
  from .extractor.rbmaradio import RBMARadioIE
+from .extractor.redtube import RedTubeIE
  from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
  from .extractor.spiegel import SpiegelIE
  from .extractor.stanfordoc import StanfordOpenClassroomIE
@@ -54,7 +56,9 @@ from .extractor.steam import SteamIE
  from .extractor.ted import TEDIE
  from .extractor.tumblr import TumblrIE
  from .extractor.ustream import UstreamIE
+from .extractor.vbox7 import Vbox7IE
  from .extractor.vimeo import VimeoIE
+from .extractor.vine import VineIE
  from .extractor.worldstarhiphop import WorldStarHipHopIE
  from .extractor.xnxx import XNXXIE
  from .extractor.xvideos import XVideosIE
@@ -98,80 +102,7 @@ from .extractor.zdf import ZDFIE
  
  
  
-class BandcampIE(InfoExtractor):
-    _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
  
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        title = mobj.group('title')
-        webpage = self._download_webpage(url, title)
-        # We get the link to the free download page
-        m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
-        if m_download is None:
-            raise ExtractorError(u'No free songs found')
-
-        download_link = m_download.group(1)
-        id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', 
-                       webpage, re.MULTILINE|re.DOTALL).group('id')
-
-        download_webpage = self._download_webpage(download_link, id,
-                                                  'Downloading free downloads page')
-        # We get the dictionary of the track from some javascrip code
-        info = re.search(r'items: (.*?),$',
-                         download_webpage, re.MULTILINE).group(1)
-        info = json.loads(info)[0]
-        # We pick mp3-320 for now, until format selection can be easily implemented.
-        mp3_info = info[u'downloads'][u'mp3-320']
-        # If we try to use this url it says the link has expired
-        initial_url = mp3_info[u'url']
-        re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
-        m_url = re.match(re_url, initial_url)
-        #We build the url we will use to get the final track url
-        # This url is build in Bandcamp in the script download_bunde_*.js
-        request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
-        final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
-        # If we could correctly generate the .rand field the url would be
-        #in the "download_url" key
-        final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
-
-        track_info = {'id':id,
-                      'title' : info[u'title'],
-                      'ext' :   'mp3',
-                      'url' :   final_url,
-                      'thumbnail' : info[u'thumb_url'],
-                      'uploader' :  info[u'artist']
-                      }
-
-        return [track_info]
-
-class RedTubeIE(InfoExtractor):
-    """Information Extractor for redtube"""
-    _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
-
-    def _real_extract(self,url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
-        video_id = mobj.group('id')
-        video_extension = 'mp4'        
-        webpage = self._download_webpage(url, video_id)
-
-        self.report_extraction(video_id)
-
-        video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
-            webpage, u'video URL')
-
-        video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
-            webpage, u'title')
-
-        return [{
-            'id':       video_id,
-            'url':      video_url,
-            'ext':      video_extension,
-            'title':    video_title,
-        }]
-        
  class InaIE(InfoExtractor):
      """Information Extractor for Ina.fr"""
      _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
@@ -233,39 +164,6 @@ class HowcastIE(InfoExtractor):
              'thumbnail': thumbnail,
          }]
  
-class VineIE(InfoExtractor):
-    """Information Extractor for Vine.co"""
-    _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        video_id = mobj.group('id')
-        webpage_url = 'https://vine.co/v/' + video_id
-        webpage = self._download_webpage(webpage_url, video_id)
-
-        self.report_extraction(video_id)
-
-        video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
-            webpage, u'video URL')
-
-        video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
-            webpage, u'title')
-
-        thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
-            webpage, u'thumbnail', fatal=False)
-
-        uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
-            webpage, u'uploader', fatal=False, flags=re.DOTALL)
-
-        return [{
-            'id':        video_id,
-            'url':       video_url,
-            'ext':       'mp4',
-            'title':     video_title,
-            'thumbnail': thumbnail,
-            'uploader':  uploader,
-        }]
  
  class FlickrIE(InfoExtractor):
      """Information Extractor for Flickr videos"""
@@ -457,41 +355,6 @@ class HypemIE(InfoExtractor):
              'artist':   artist,
          }]
  
-class Vbox7IE(InfoExtractor):
-    """Information Extractor for Vbox7"""
-    _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
-
-    def _real_extract(self,url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-        video_id = mobj.group(1)
-
-        redirect_page, urlh = self._download_webpage_handle(url, video_id)
-        new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
-        redirect_url = urlh.geturl() + new_location
-        webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
-
-        title = self._html_search_regex(r'<title>(.*)</title>',
-            webpage, u'title').split('/')[0].strip()
-
-        ext = "flv"
-        info_url = "http://vbox7.com/play/magare.do"
-        data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
-        info_request = compat_urllib_request.Request(info_url, data)
-        info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
-        if info_response is None:
-            raise ExtractorError(u'Unable to extract the media url')
-        (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
-
-        return [{
-            'id':        video_id,
-            'url':       final_url,
-            'ext':       ext,
-            'title':     title,
-            'thumbnail': thumbnail_url,
-        }]
  
  
  def gen_extractors():