Move G+ IE into its own file, and move google search into a more descriptive module

[youtube-dl.git] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index 999521feb28bc54f301e9cf75424df98d2649453..ca1e43404122ff05c750830b6667e686a7fc65c8 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -1,8 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-from __future__ import absolute_import
-
  import base64
  import datetime
  import itertools
@@ -25,13 +20,19 @@ from .extractor.common import InfoExtractor, SearchInfoExtractor
  
  from .extractor.ard import ARDIE
  from .extractor.arte import ArteTvIE
+from .extractor.bliptv import BlipTVIE, BlipTVUserIE
+from .extractor.comedycentral import ComedyCentralIE
  from .extractor.dailymotion import DailymotionIE
  from .extractor.gametrailers import GametrailersIE
+from .extractor.generic import GenericIE
+from .extractor.googleplus import GooglePlusIE
+from .extractor.googlesearch import GoogleSearchIE
  from .extractor.metacafe import MetacafeIE
+from .extractor.myvideo import MyVideoIE
  from .extractor.statigram import StatigramIE
  from .extractor.photobucket import PhotobucketIE
  from .extractor.vimeo import VimeoIE
-from .extractor.yahoo import YahooIE
+from .extractor.yahoo import YahooIE, YahooSearchIE
  from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
  from .extractor.zdf import ZDFIE
  
@@ -45,273 +46,13 @@ from .extractor.zdf import ZDFIE
  
  
  
-class GenericIE(InfoExtractor):
-    """Generic last-resort information extractor."""
  
-    _VALID_URL = r'.*'
-    IE_NAME = u'generic'
  
-    def report_download_webpage(self, video_id):
-        """Report webpage download."""
-        if not self._downloader.params.get('test', False):
-            self._downloader.report_warning(u'Falling back on generic information extractor.')
-        super(GenericIE, self).report_download_webpage(video_id)
  
-    def report_following_redirect(self, new_url):
-        """Report information extraction."""
-        self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
-
-    def _test_redirect(self, url):
-        """Check if it is a redirect, like url shorteners, in case return the new url."""
-        class HeadRequest(compat_urllib_request.Request):
-            def get_method(self):
-                return "HEAD"
-
-        class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
-            """
-            Subclass the HTTPRedirectHandler to make it use our
-            HeadRequest also on the redirected URL
-            """
-            def redirect_request(self, req, fp, code, msg, headers, newurl):
-                if code in (301, 302, 303, 307):
-                    newurl = newurl.replace(' ', '%20')
-                    newheaders = dict((k,v) for k,v in req.headers.items()
-                                      if k.lower() not in ("content-length", "content-type"))
-                    return HeadRequest(newurl,
-                                       headers=newheaders,
-                                       origin_req_host=req.get_origin_req_host(),
-                                       unverifiable=True)
-                else:
-                    raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
-
-        class HTTPMethodFallback(compat_urllib_request.BaseHandler):
-            """
-            Fallback to GET if HEAD is not allowed (405 HTTP error)
-            """
-            def http_error_405(self, req, fp, code, msg, headers):
-                fp.read()
-                fp.close()
-
-                newheaders = dict((k,v) for k,v in req.headers.items()
-                                  if k.lower() not in ("content-length", "content-type"))
-                return self.parent.open(compat_urllib_request.Request(req.get_full_url(),
-                                                 headers=newheaders,
-                                                 origin_req_host=req.get_origin_req_host(),
-                                                 unverifiable=True))
-
-        # Build our opener
-        opener = compat_urllib_request.OpenerDirector()
-        for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
-                        HTTPMethodFallback, HEADRedirectHandler,
-                        compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
-            opener.add_handler(handler())
-
-        response = opener.open(HeadRequest(url))
-        if response is None:
-            raise ExtractorError(u'Invalid URL protocol')
-        new_url = response.geturl()
-
-        if url == new_url:
-            return False
-
-        self.report_following_redirect(new_url)
-        return new_url
  
-    def _real_extract(self, url):
-        new_url = self._test_redirect(url)
-        if new_url: return [self.url_result(new_url)]
  
-        video_id = url.split('/')[-1]
-        try:
-            webpage = self._download_webpage(url, video_id)
-        except ValueError as err:
-            # since this is the last-resort InfoExtractor, if
-            # this error is thrown, it'll be thrown here
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
-        self.report_extraction(video_id)
-        # Start with something easy: JW Player in SWFObject
-        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
-        if mobj is None:
-            # Broaden the search a little bit
-            mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
-        if mobj is None:
-            # Broaden the search a little bit: JWPlayer JS loader
-            mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage)
-        if mobj is None:
-            # Try to find twitter cards info
-            mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
-        if mobj is None:
-            # We look for Open Graph info:
-            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
-            m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
-            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
-            if m_video_type is not None:
-                mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
-        # It's possible that one of the regexes
-        # matched, but returned an empty group:
-        if mobj.group(1) is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
-        video_url = compat_urllib_parse.unquote(mobj.group(1))
-        video_id = os.path.basename(video_url)
-
-        # here's a fun little line of code for you:
-        video_extension = os.path.splitext(video_id)[1][1:]
-        video_id = os.path.splitext(video_id)[0]
-
-        # it's tempting to parse this further, but you would
-        # have to take into account all the variations like
-        #   Video Title - Site Name
-        #   Site Name | Video Title
-        #   Video Title - Tagline | Site Name
-        # and so on and so forth; it's just not practical
-        video_title = self._html_search_regex(r'<title>(.*)</title>',
-            webpage, u'video title')
-
-        # video uploader is domain name
-        video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
-            url, u'video uploader')
-
-        return [{
-            'id':       video_id,
-            'url':      video_url,
-            'uploader': video_uploader,
-            'upload_date':  None,
-            'title':    video_title,
-            'ext':      video_extension,
-        }]
-
-
-
-class GoogleSearchIE(SearchInfoExtractor):
-    """Information Extractor for Google Video search queries."""
-    _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
-    _MAX_RESULTS = 1000
-    IE_NAME = u'video.google:search'
-    _SEARCH_KEY = 'gvsearch'
-
-    def _get_n_results(self, query, n):
-        """Get a specified number of results for a query"""
-
-        res = {
-            '_type': 'playlist',
-            'id': query,
-            'entries': []
-        }
-
-        for pagenum in itertools.count(1):
-            result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10)
-            webpage = self._download_webpage(result_url, u'gvsearch:' + query,
-                                             note='Downloading result page ' + str(pagenum))
-
-            for mobj in re.finditer(r'<h3 class="r"><a href="([^"]+)"', webpage):
-                e = {
-                    '_type': 'url',
-                    'url': mobj.group(1)
-                }
-                res['entries'].append(e)
-
-            if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage):
-                return res
-
-class YahooSearchIE(SearchInfoExtractor):
-    """Information Extractor for Yahoo! Video search queries."""
-
-    _MAX_RESULTS = 1000
-    IE_NAME = u'screen.yahoo:search'
-    _SEARCH_KEY = 'yvsearch'
-
-    def _get_n_results(self, query, n):
-        """Get a specified number of results for a query"""
-
-        res = {
-            '_type': 'playlist',
-            'id': query,
-            'entries': []
-        }
-        for pagenum in itertools.count(0): 
-            result_url = u'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
-            webpage = self._download_webpage(result_url, query,
-                                             note='Downloading results page '+str(pagenum+1))
-            info = json.loads(webpage)
-            m = info[u'm']
-            results = info[u'results']
-
-            for (i, r) in enumerate(results):
-                if (pagenum * 30) +i >= n:
-                    break
-                mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
-                e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
-                res['entries'].append(e)
-            if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )):
-                break
-
-        return res
  
  
-class BlipTVUserIE(InfoExtractor):
-    """Information Extractor for blip.tv users."""
-
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
-    _PAGE_SIZE = 12
-    IE_NAME = u'blip.tv:user'
-
-    def _real_extract(self, url):
-        # Extract username
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
-        username = mobj.group(1)
-
-        page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
-
-        page = self._download_webpage(url, username, u'Downloading user page')
-        mobj = re.search(r'data-users-id="([^"]+)"', page)
-        page_base = page_base % mobj.group(1)
-
-
-        # Download video ids using BlipTV Ajax calls. Result size per
-        # query is limited (currently to 12 videos) so we need to query
-        # page by page until there are no video ids - it means we got
-        # all of them.
-
-        video_ids = []
-        pagenum = 1
-
-        while True:
-            url = page_base + "&page=" + str(pagenum)
-            page = self._download_webpage(url, username,
-                                          u'Downloading video ids from page %d' % pagenum)
-
-            # Extract video identifiers
-            ids_in_page = []
-
-            for mobj in re.finditer(r'href="/([^"]+)"', page):
-                if mobj.group(1) not in ids_in_page:
-                    ids_in_page.append(unescapeHTML(mobj.group(1)))
-
-            video_ids.extend(ids_in_page)
-
-            # A little optimization - if current page is not
-            # "full", ie. does not contain PAGE_SIZE video ids then
-            # we can assume that this page is the last one - there
-            # are no more ids on further pages - no need to query
-            # again.
-
-            if len(ids_in_page) < self._PAGE_SIZE:
-                break
-
-            pagenum += 1
-
-        urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids]
-        url_entries = [self.url_result(url, 'BlipTV') for url in urls]
-        return [self.playlist_result(url_entries, playlist_title = username)]
-
  
  class DepositFilesIE(InfoExtractor):
      """Information extractor for depositfiles.com"""
@@ -455,421 +196,9 @@ class FacebookIE(InfoExtractor):
          return [info]
  
  
-class BlipTVIE(InfoExtractor):
-    """Information extractor for blip.tv"""
-
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
-    _URL_EXT = r'^.*\.([a-z0-9]+)$'
-    IE_NAME = u'blip.tv'
-
-    def report_direct_download(self, title):
-        """Report information extraction."""
-        self.to_screen(u'%s: Direct download detected' % title)
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
-        # See https://github.com/rg3/youtube-dl/issues/857
-        api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P<video_id>[\d\w]+)', url)
-        if api_mobj is not None:
-            url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
-        urlp = compat_urllib_parse_urlparse(url)
-        if urlp.path.startswith('/play/'):
-            request = compat_urllib_request.Request(url)
-            response = compat_urllib_request.urlopen(request)
-            redirecturl = response.geturl()
-            rurlp = compat_urllib_parse_urlparse(redirecturl)
-            file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
-            url = 'http://blip.tv/a/a-' + file_id
-            return self._real_extract(url)
-
-
-        if '?' in url:
-            cchar = '&'
-        else:
-            cchar = '?'
-        json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
-        request = compat_urllib_request.Request(json_url)
-        request.add_header('User-Agent', 'iTunes/10.6.1')
-        self.report_extraction(mobj.group(1))
-        info = None
-        try:
-            urlh = compat_urllib_request.urlopen(request)
-            if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
-                basename = url.split('/')[-1]
-                title,ext = os.path.splitext(basename)
-                title = title.decode('UTF-8')
-                ext = ext.replace('.', '')
-                self.report_direct_download(title)
-                info = {
-                    'id': title,
-                    'url': url,
-                    'uploader': None,
-                    'upload_date': None,
-                    'title': title,
-                    'ext': ext,
-                    'urlhandle': urlh
-                }
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
-        if info is None: # Regular URL
-            try:
-                json_code_bytes = urlh.read()
-                json_code = json_code_bytes.decode('utf-8')
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
-
-            try:
-                json_data = json.loads(json_code)
-                if 'Post' in json_data:
-                    data = json_data['Post']
-                else:
-                    data = json_data
-
-                upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
-                video_url = data['media']['url']
-                umobj = re.match(self._URL_EXT, video_url)
-                if umobj is None:
-                    raise ValueError('Can not determine filename extension')
-                ext = umobj.group(1)
-
-                info = {
-                    'id': data['item_id'],
-                    'url': video_url,
-                    'uploader': data['display_name'],
-                    'upload_date': upload_date,
-                    'title': data['title'],
-                    'ext': ext,
-                    'format': data['media']['mimeType'],
-                    'thumbnail': data['thumbnailUrl'],
-                    'description': data['description'],
-                    'player_url': data['embedUrl'],
-                    'user_agent': 'iTunes/10.6.1',
-                }
-            except (ValueError,KeyError) as err:
-                raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
-
-        return [info]
-
-
-class MyVideoIE(InfoExtractor):
-    """Information Extractor for myvideo.de."""
-
-    _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
-    IE_NAME = u'myvideo'
-
-    # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
-    # Released into the Public Domain by Tristan Fischer on 2013-05-19
-    # https://github.com/rg3/youtube-dl/pull/842
-    def __rc4crypt(self,data, key):
-        x = 0
-        box = list(range(256))
-        for i in list(range(256)):
-            x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
-            box[i], box[x] = box[x], box[i]
-        x = 0
-        y = 0
-        out = ''
-        for char in data:
-            x = (x + 1) % 256
-            y = (y + box[x]) % 256
-            box[x], box[y] = box[y], box[x]
-            out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
-        return out
-
-    def __md5(self,s):
-        return hashlib.md5(s).hexdigest().encode()
  
-    def _real_extract(self,url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'invalid URL: %s' % url)
  
-        video_id = mobj.group(1)
  
-        GK = (
-          b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
-          b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
-          b'TnpsbA0KTVRkbU1tSTRNdz09'
-        )
-
-        # Get video webpage
-        webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
-        webpage = self._download_webpage(webpage_url, video_id)
-
-        mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
-        if mobj is not None:
-            self.report_extraction(video_id)
-            video_url = mobj.group(1) + '.flv'
-
-            video_title = self._html_search_regex('<title>([^<]+)</title>',
-                webpage, u'title')
-
-            video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
-
-            return [{
-                'id':       video_id,
-                'url':      video_url,
-                'uploader': None,
-                'upload_date':  None,
-                'title':    video_title,
-                'ext':      u'flv',
-            }]
-
-        # try encxml
-        mobj = re.search('var flashvars={(.+?)}', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract video')
-
-        params = {}
-        encxml = ''
-        sec = mobj.group(1)
-        for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
-            if not a == '_encxml':
-                params[a] = b
-            else:
-                encxml = compat_urllib_parse.unquote(b)
-        if not params.get('domain'):
-            params['domain'] = 'www.myvideo.de'
-        xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
-        if 'flash_playertype=MTV' in xmldata_url:
-            self._downloader.report_warning(u'avoiding MTV player')
-            xmldata_url = (
-                'http://www.myvideo.de/dynamic/get_player_video_xml.php'
-                '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
-            ) % video_id
-
-        # get enc data
-        enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
-        enc_data_b = binascii.unhexlify(enc_data)
-        sk = self.__md5(
-            base64.b64decode(base64.b64decode(GK)) +
-            self.__md5(
-                str(video_id).encode('utf-8')
-            )
-        )
-        dec_data = self.__rc4crypt(enc_data_b, sk)
-
-        # extracting infos
-        self.report_extraction(video_id)
-
-        video_url = None
-        mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
-        if mobj:
-            video_url = compat_urllib_parse.unquote(mobj.group(1))
-            if 'myvideo2flash' in video_url:
-                self._downloader.report_warning(u'forcing RTMPT ...')
-                video_url = video_url.replace('rtmpe://', 'rtmpt://')
-
-        if not video_url:
-            # extract non rtmp videos
-            mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
-            if mobj is None:
-                raise ExtractorError(u'unable to extract url')
-            video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
-
-        video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
-        video_file = compat_urllib_parse.unquote(video_file)
-
-        if not video_file.endswith('f4m'):
-            ppath, prefix = video_file.split('.')
-            video_playpath = '%s:%s' % (prefix, ppath)
-            video_hls_playlist = ''
-        else:
-            video_playpath = ''
-            video_hls_playlist = (
-                video_filepath + video_file
-            ).replace('.f4m', '.m3u8')
-
-        video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
-        video_swfobj = compat_urllib_parse.unquote(video_swfobj)
-
-        video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
-            webpage, u'title')
-
-        return [{
-            'id':                 video_id,
-            'url':                video_url,
-            'tc_url':             video_url,
-            'uploader':           None,
-            'upload_date':        None,
-            'title':              video_title,
-            'ext':                u'flv',
-            'play_path':          video_playpath,
-            'video_file':         video_file,
-            'video_hls_playlist': video_hls_playlist,
-            'player_url':         video_swfobj,
-        }]
-
-
-class ComedyCentralIE(InfoExtractor):
-    """Information extractor for The Daily Show and Colbert Report """
-
-    # urls can be abbreviations like :thedailyshow or :colbert
-    # urls for episodes like:
-    # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
-    #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
-    #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
-    _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
-                      |(https?://)?(www\.)?
-                          (?P<showname>thedailyshow|colbertnation)\.com/
-                         (full-episodes/(?P<episode>.*)|
-                          (?P<clip>
-                              (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
-                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
-                     $"""
-
-    _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
-
-    _video_extensions = {
-        '3500': 'mp4',
-        '2200': 'mp4',
-        '1700': 'mp4',
-        '1200': 'mp4',
-        '750': 'mp4',
-        '400': 'mp4',
-    }
-    _video_dimensions = {
-        '3500': '1280x720',
-        '2200': '960x540',
-        '1700': '768x432',
-        '1200': '640x360',
-        '750': '512x288',
-        '400': '384x216',
-    }
-
-    @classmethod
-    def suitable(cls, url):
-        """Receives a URL and returns True if suitable for this IE."""
-        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
-
-    def _print_formats(self, formats):
-        print('Available formats:')
-        for x in formats:
-            print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
-
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
-        if mobj.group('shortname'):
-            if mobj.group('shortname') in ('tds', 'thedailyshow'):
-                url = u'http://www.thedailyshow.com/full-episodes/'
-            else:
-                url = u'http://www.colbertnation.com/full-episodes/'
-            mobj = re.match(self._VALID_URL, url, re.VERBOSE)
-            assert mobj is not None
-
-        if mobj.group('clip'):
-            if mobj.group('showname') == 'thedailyshow':
-                epTitle = mobj.group('tdstitle')
-            else:
-                epTitle = mobj.group('cntitle')
-            dlNewest = False
-        else:
-            dlNewest = not mobj.group('episode')
-            if dlNewest:
-                epTitle = mobj.group('showname')
-            else:
-                epTitle = mobj.group('episode')
-
-        self.report_extraction(epTitle)
-        webpage,htmlHandle = self._download_webpage_handle(url, epTitle)
-        if dlNewest:
-            url = htmlHandle.geturl()
-            mobj = re.match(self._VALID_URL, url, re.VERBOSE)
-            if mobj is None:
-                raise ExtractorError(u'Invalid redirected URL: ' + url)
-            if mobj.group('episode') == '':
-                raise ExtractorError(u'Redirected URL is still not specific: ' + url)
-            epTitle = mobj.group('episode')
-
-        mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
-
-        if len(mMovieParams) == 0:
-            # The Colbert Report embeds the information in a without
-            # a URL prefix; so extract the alternate reference
-            # and then add the URL prefix manually.
-
-            altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
-            if len(altMovieParams) == 0:
-                raise ExtractorError(u'unable to find Flash URL in webpage ' + url)
-            else:
-                mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
-
-        uri = mMovieParams[0][1]
-        indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
-        indexXml = self._download_webpage(indexUrl, epTitle,
-                                          u'Downloading show index',
-                                          u'unable to download episode index')
-
-        results = []
-
-        idoc = xml.etree.ElementTree.fromstring(indexXml)
-        itemEls = idoc.findall('.//item')
-        for partNum,itemEl in enumerate(itemEls):
-            mediaId = itemEl.findall('./guid')[0].text
-            shortMediaId = mediaId.split(':')[-1]
-            showId = mediaId.split(':')[-2].replace('.com', '')
-            officialTitle = itemEl.findall('./title')[0].text
-            officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
-
-            configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
-                        compat_urllib_parse.urlencode({'uri': mediaId}))
-            configXml = self._download_webpage(configUrl, epTitle,
-                                               u'Downloading configuration for %s' % shortMediaId)
-
-            cdoc = xml.etree.ElementTree.fromstring(configXml)
-            turls = []
-            for rendition in cdoc.findall('.//rendition'):
-                finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
-                turls.append(finfo)
-
-            if len(turls) == 0:
-                self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found')
-                continue
-
-            if self._downloader.params.get('listformats', None):
-                self._print_formats([i[0] for i in turls])
-                return
-
-            # For now, just pick the highest bitrate
-            format,rtmp_video_url = turls[-1]
-
-            # Get the format arg from the arg stream
-            req_format = self._downloader.params.get('format', None)
-
-            # Select format if we can find one
-            for f,v in turls:
-                if f == req_format:
-                    format, rtmp_video_url = f, v
-                    break
-
-            m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
-            if not m:
-                raise ExtractorError(u'Cannot transform RTMP url')
-            base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
-            video_url = base + m.group('finalid')
-
-            effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
-            info = {
-                'id': shortMediaId,
-                'url': video_url,
-                'uploader': showId,
-                'upload_date': officialDate,
-                'title': effTitle,
-                'ext': 'mp4',
-                'format': format,
-                'thumbnail': None,
-                'description': officialTitle,
-            }
-            results.append(info)
-
-        return results
  
  
  class EscapistIE(InfoExtractor):
@@ -1598,79 +927,6 @@ class XNXXIE(InfoExtractor):
          }]
  
  
-class GooglePlusIE(InfoExtractor):
-    """Information extractor for plus.google.com."""
-
-    _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
-    IE_NAME = u'plus.google'
-
-    def _real_extract(self, url):
-        # Extract id from URL
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
-        post_url = mobj.group(0)
-        video_id = mobj.group(1)
-
-        video_extension = 'flv'
-
-        # Step 1, Retrieve post webpage to extract further information
-        webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage')
-
-        self.report_extraction(video_id)
-
-        # Extract update date
-        upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
-            webpage, u'upload date', fatal=False)
-        if upload_date:
-            # Convert timestring to a format suitable for filename
-            upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
-            upload_date = upload_date.strftime('%Y%m%d')
-
-        # Extract uploader
-        uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
-            webpage, u'uploader', fatal=False)
-
-        # Extract title
-        # Get the first line for title
-        video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
-            webpage, 'title', default=u'NA')
-
-        # Step 2, Stimulate clicking the image box to launch video
-        video_page = self._search_regex('"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]',
-            webpage, u'video page URL')
-        webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
-
-        # Extract video links on video page
-        """Extract video links of all sizes"""
-        pattern = '\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
-        mobj = re.findall(pattern, webpage)
-        if len(mobj) == 0:
-            raise ExtractorError(u'Unable to extract video links')
-
-        # Sort in resolution
-        links = sorted(mobj)
-
-        # Choose the lowest of the sort, i.e. highest resolution
-        video_url = links[-1]
-        # Only get the url. The resolution part in the tuple has no use anymore
-        video_url = video_url[-1]
-        # Treat escaped \u0026 style hex
-        try:
-            video_url = video_url.decode("unicode_escape")
-        except AttributeError: # Python 3
-            video_url = bytes(video_url, 'ascii').decode('unicode-escape')
-
-
-        return [{
-            'id':       video_id,
-            'url':      video_url,
-            'uploader': uploader,
-            'upload_date':  upload_date,
-            'title':    video_title,
-            'ext':      video_extension,
-        }]
  
  class NBAIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'