]> gitweb @ CieloNegro.org - youtube-dl.git/commitdiff
Merge remote-tracking branch 'origin/vimeo_passworded_videos'
authorPhilipp Hagemeister <phihag@phihag.de>
Sun, 23 Jun 2013 17:00:16 +0000 (19:00 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Sun, 23 Jun 2013 17:00:16 +0000 (19:00 +0200)
1  2 
youtube_dl/InfoExtractors.py
youtube_dl/__init__.py

index 507dfc324599532be67eadb1ceda4e445f287cd0,e27e0cb7c6e2b03b012666e1d8008af4b8d69bec..574d417beecc97718edcc6da864acb4d503d325a
@@@ -191,47 -191,6 +191,47 @@@ class InfoExtractor(object)
              video_info['title'] = playlist_title
          return video_info
  
 +    def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
 +        """
 +        Perform a regex search on the given string, using a single or a list of
 +        patterns returning the first matching group.
 +        In case of failure return a default value or raise a WARNING or a
 +        ExtractorError, depending on fatal, specifying the field name.
 +        """
 +        if isinstance(pattern, (str, compat_str, compiled_regex_type)):
 +            mobj = re.search(pattern, string, flags)
 +        else:
 +            for p in pattern:
 +                mobj = re.search(p, string, flags)
 +                if mobj: break
 +
 +        if sys.stderr.isatty() and os.name != 'nt':
 +            _name = u'\033[0;34m%s\033[0m' % name
 +        else:
 +            _name = name
 +
 +        if mobj:
 +            # return the first matching group
 +            return next(g for g in mobj.groups() if g is not None)
 +        elif default is not None:
 +            return default
 +        elif fatal:
 +            raise ExtractorError(u'Unable to extract %s' % _name)
 +        else:
 +            self._downloader.report_warning(u'unable to extract %s; '
 +                u'please report this issue on GitHub.' % _name)
 +            return None
 +
 +    def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
 +        """
 +        Like _search_regex, but strips HTML tags and unescapes entities.
 +        """
 +        res = self._search_regex(pattern, string, name, default, fatal, flags)
 +        if res:
 +            return clean_html(res).strip()
 +        else:
 +            return res
 +
  class SearchInfoExtractor(InfoExtractor):
      """
      Base class for paged search queries extractors.
@@@ -420,7 -379,7 +420,7 @@@ class YoutubeIE(InfoExtractor)
      def _request_automatic_caption(self, video_id, webpage):
          """We need the webpage for getting the captions url, pass it as an
             argument to speed up the process."""
 -        sub_lang = self._downloader.params.get('subtitleslang')
 +        sub_lang = self._downloader.params.get('subtitleslang') or 'en'
          sub_format = self._downloader.params.get('subtitlesformat')
          self.to_screen(u'%s: Looking for automatic captions' % video_id)
          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
                          pass
                      else:
                          # We report the original error
 -                        self._downloader.report_error(sub_error)
 +                        self._downloader.report_warning(sub_error)
  
          if self._downloader.params.get('allsubtitles', False):
              video_subtitles = self._extract_all_subtitles(video_id)
              for video_subtitle in video_subtitles:
                  (sub_error, sub_lang, sub) = video_subtitle
                  if sub_error:
 -                    self._downloader.report_error(sub_error)
 +                    self._downloader.report_warning(sub_error)
  
          if self._downloader.params.get('listsubtitles', False):
              sub_lang_list = self._list_available_subtitles(video_id)
              for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
                  url_data = compat_parse_qs(url_data_str)
                  if 'itag' in url_data and 'url' in url_data:
 -                    url = url_data['url'][0] + '&signature=' + url_data['sig'][0]
 -                    if not 'ratebypass' in url: url += '&ratebypass=yes'
 +                    url = url_data['url'][0]
 +                    if 'sig' in url_data:
 +                        url += '&signature=' + url_data['sig'][0]
 +                    if 'ratebypass' not in url:
 +                        url += '&ratebypass=yes'
                      url_map[url_data['itag'][0]] = url
  
              format_limit = self._downloader.params.get('format_limit', None)
@@@ -943,10 -899,16 +943,10 @@@ class DailymotionIE(InfoExtractor)
          video_title = unescapeHTML(mobj.group('title'))
  
          video_uploader = None
 -        mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage)
 -        if mobj is None:
 -            # lookin for official user
 -            mobj_official = re.search(r'<span rel="author"[^>]+?>([^<]+?)</span>', webpage)
 -            if mobj_official is None:
 -                self._downloader.report_warning(u'unable to extract uploader nickname')
 -            else:
 -                video_uploader = mobj_official.group(1)
 -        else:
 -            video_uploader = mobj.group(1)
 +        video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
 +                                             # Looking for official user
 +                                             r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
 +                                            webpage, 'video uploader')
  
          video_upload_date = None
          mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
@@@ -1002,13 -964,18 +1002,13 @@@ class PhotobucketIE(InfoExtractor)
              }]
  
          # We try looking in other parts of the webpage
 -        mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract media URL')
 -        mediaURL = compat_urllib_parse.unquote(mobj.group(1))
 -
 -        video_url = mediaURL
 +        video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />',
 +            webpage, u'video URL')
  
          mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
          if mobj is None:
              raise ExtractorError(u'Unable to extract title')
          video_title = mobj.group(1).decode('utf-8')
 -
          video_uploader = mobj.group(2).decode('utf-8')
  
          return [{
@@@ -1096,6 -1063,25 +1096,25 @@@ class VimeoIE(InfoExtractor)
      _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
      IE_NAME = u'vimeo'
  
+     def _verify_video_password(self, url, video_id, webpage):
+         password = self._downloader.params.get('password', None)
+         if password is None:
+             raise ExtractorError(u'This video is protected by a password, use the --password option')
+         token = re.search(r'xsrft: \'(.*?)\'', webpage).group(1)
+         data = compat_urllib_parse.urlencode({'password': password,
+                                               'token': token})
+         # I didn't manage to use the password with https
+         if url.startswith('https'):
+             pass_url = url.replace('https','http')
+         else:
+             pass_url = url
+         password_request = compat_urllib_request.Request(pass_url+'/password', data)
+         password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+         password_request.add_header('Cookie', 'xsrft=%s' % token)
+         pass_web = self._download_webpage(password_request, video_id,
+                                           u'Verifying the password',
+                                           u'Wrong password')
      def _real_extract(self, url, new_video=True):
          # Extract ID from URL
          mobj = re.match(self._VALID_URL, url)
          except:
              if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
                  raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option')
+             if re.search('If so please provide the correct password.', webpage):
+                 self._verify_video_password(url, video_id, webpage)
+                 return self._real_extract(url)
              else:
                  raise ExtractorError(u'Unable to extract info section')
  
@@@ -1409,13 -1399,6 +1432,13 @@@ class GenericIE(InfoExtractor)
          if mobj is None:
              # Try to find twitter cards info
              mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
 +        if mobj is None:
 +            # We look for Open Graph info:
 +            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
 +            m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
 +            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
 +            if m_video_type is not None:
 +                mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
          if mobj is None:
              raise ExtractorError(u'Invalid URL: %s' % url)
  
          #   Site Name | Video Title
          #   Video Title - Tagline | Site Name
          # and so on and so forth; it's just not practical
 -        mobj = re.search(r'<title>(.*)</title>', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract title')
 -        video_title = mobj.group(1)
 +        video_title = self._html_search_regex(r'<title>(.*)</title>',
 +            webpage, u'video title')
  
          # video uploader is domain name
 -        mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract title')
 -        video_uploader = mobj.group(1)
 +        video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
 +            url, u'video uploader')
  
          return [{
              'id':       video_id,
@@@ -1580,7 -1567,7 +1603,7 @@@ class YoutubePlaylistIE(InfoExtractor)
                       |
                          ((?:PL|EC|UU)[0-9A-Za-z-_]{10,})
                       )"""
 -    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json'
 +    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
      _MAX_RESULTS = 50
      IE_NAME = u'youtube:playlist'
  
                  # Number of videos is a multiple of self._MAX_RESULTS
                  break
  
 -            videos += [ (entry['yt$position']['$t'], entry['content']['src'])
 -                        for entry in response['feed']['entry']
 -                        if 'content' in entry ]
 +            for entry in response['feed']['entry']:
 +                index = entry['yt$position']['$t']
 +                if 'media$group' in entry and 'media$player' in entry['media$group']:
 +                    videos.append((index, entry['media$group']['media$player']['url']))
  
              if len(response['feed']['entry']) < self._MAX_RESULTS:
                  break
@@@ -1842,7 -1828,10 +1865,7 @@@ class DepositFilesIE(InfoExtractor)
          file_extension = os.path.splitext(file_url)[1][1:]
  
          # Search for file title
 -        mobj = re.search(r'<b title="(.*?)">', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract title')
 -        file_title = mobj.group(1).decode('utf-8')
 +        file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
  
          return [{
              'id':       file_id.decode('utf-8'),
@@@ -1936,8 -1925,10 +1959,8 @@@ class FacebookIE(InfoExtractor)
          video_duration = int(video_data['video_duration'])
          thumbnail = video_data['thumbnail_src']
  
 -        m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
 -        if not m:
 -            raise ExtractorError(u'Cannot find title in webpage')
 -        video_title = unescapeHTML(m.group(1))
 +        video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>',
 +            webpage, u'title')
  
          info = {
              'id': video_id,
@@@ -2099,10 -2090,15 +2122,10 @@@ class MyVideoIE(InfoExtractor)
              self.report_extraction(video_id)
              video_url = mobj.group(1) + '.flv'
  
 -            mobj = re.search('<title>([^<]+)</title>', webpage)
 -            if mobj is None:
 -                raise ExtractorError(u'Unable to extract title')
 -            video_title = mobj.group(1)
 +            video_title = self._html_search_regex('<title>([^<]+)</title>',
 +                webpage, u'title')
  
 -            mobj = re.search('[.](.+?)$', video_url)
 -            if mobj is None:
 -                raise ExtractorError(u'Unable to extract extention')
 -            video_ext = mobj.group(1)
 +            video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
  
              return [{
                  'id':       video_id,
          # extracting infos
          self.report_extraction(video_id)
  
 +        video_url = None
          mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
 -        if mobj is None:
 -            raise ExtractorError(u'unable to extract rtmpurl')
 -        video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1))
 -        if 'myvideo2flash' in video_rtmpurl:
 -            self._downloader.report_warning(u'forcing RTMPT ...')
 -            video_rtmpurl = video_rtmpurl.replace('rtmpe://', 'rtmpt://')
 -
 -        # extract non rtmp videos
 -        if (video_rtmpurl is None) or (video_rtmpurl == ''):
 +        if mobj:
 +            video_url = compat_urllib_parse.unquote(mobj.group(1))
 +            if 'myvideo2flash' in video_url:
 +                self._downloader.report_warning(u'forcing RTMPT ...')
 +                video_url = video_url.replace('rtmpe://', 'rtmpt://')
 +
 +        if not video_url:
 +            # extract non rtmp videos
              mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
              if mobj is None:
                  raise ExtractorError(u'unable to extract url')
 -            video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
 +            video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
  
 -        mobj = re.search('source=\'(.*?)\'', dec_data)
 -        if mobj is None:
 -            raise ExtractorError(u'unable to extract swfobj')
 -        video_file     = compat_urllib_parse.unquote(mobj.group(1))
 +        video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
 +        video_file = compat_urllib_parse.unquote(video_file)
  
          if not video_file.endswith('f4m'):
              ppath, prefix = video_file.split('.')
                  video_filepath + video_file
              ).replace('.f4m', '.m3u8')
  
 -        mobj = re.search('swfobject.embedSWF\(\'(.+?)\'', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'unable to extract swfobj')
 -        video_swfobj = compat_urllib_parse.unquote(mobj.group(1))
 +        video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
 +        video_swfobj = compat_urllib_parse.unquote(video_swfobj)
  
 -        mobj = re.search("<h1(?: class='globalHd')?>(.*?)</h1>", webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'unable to extract title')
 -        video_title = mobj.group(1)
 +        video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
 +            webpage, u'title')
  
          return [{
              'id':                 video_id,
 -            'url':                video_rtmpurl,
 -            'tc_url':             video_rtmpurl,
 +            'url':                video_url,
 +            'tc_url':             video_url,
              'uploader':           None,
              'upload_date':        None,
              'title':              video_title,
              'player_url':         video_swfobj,
          }]
  
 +
  class ComedyCentralIE(InfoExtractor):
      """Information extractor for The Daily Show and Colbert Report """
  
@@@ -2380,25 -2381,19 +2403,25 @@@ class EscapistIE(InfoExtractor)
          showName = mobj.group('showname')
          videoId = mobj.group('episode')
  
 -        self.report_extraction(showName)
 -        webPage = self._download_webpage(url, showName)
 +        self.report_extraction(videoId)
 +        webpage = self._download_webpage(url, videoId)
 +
 +        videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
 +            webpage, u'description', fatal=False)
 +
 +        imgUrl = self._html_search_regex('<meta property="og:image" content="([^"]*)"',
 +            webpage, u'thumbnail', fatal=False)
 +
 +        playerUrl = self._html_search_regex('<meta property="og:video" content="([^"]*)"',
 +            webpage, u'player url')
  
 -        descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
 -        description = unescapeHTML(descMatch.group(1))
 -        imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
 -        imgUrl = unescapeHTML(imgMatch.group(1))
 -        playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
 -        playerUrl = unescapeHTML(playerUrlMatch.group(1))
 -        configUrlMatch = re.search('config=(.*)$', playerUrl)
 -        configUrl = compat_urllib_parse.unquote(configUrlMatch.group(1))
 +        title = self._html_search_regex('<meta name="title" content="([^"]*)"',
 +            webpage, u'player url').split(' : ')[-1]
  
 -        configJSON = self._download_webpage(configUrl, showName,
 +        configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
 +        configUrl = compat_urllib_parse.unquote(configUrl)
 +
 +        configJSON = self._download_webpage(configUrl, videoId,
                                              u'Downloading configuration',
                                              u'unable to download configuration')
  
              'url': videoUrl,
              'uploader': showName,
              'upload_date': None,
 -            'title': showName,
 +            'title': title,
              'ext': 'mp4',
              'thumbnail': imgUrl,
 -            'description': description,
 +            'description': videoDesc,
              'player_url': playerUrl,
          }
  
@@@ -2506,17 -2501,26 +2529,17 @@@ class XVideosIE(InfoExtractor)
  
          self.report_extraction(video_id)
  
 -
          # Extract video URL
 -        mobj = re.search(r'flv_url=(.+?)&', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract video url')
 -        video_url = compat_urllib_parse.unquote(mobj.group(1))
 -
 +        video_url = compat_urllib_parse.unquote(self._search_regex(r'flv_url=(.+?)&',
 +            webpage, u'video URL'))
  
          # Extract title
 -        mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract video title')
 -        video_title = mobj.group(1)
 -
 +        video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XVID',
 +            webpage, u'title')
  
          # Extract video thumbnail
 -        mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract video thumbnail')
 -        video_thumbnail = mobj.group(0)
 +        video_thumbnail = self._search_regex(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)',
 +            webpage, u'thumbnail', fatal=False)
  
          info = {
              'id': video_id,
@@@ -2673,12 -2677,16 +2696,12 @@@ class InfoQIE(InfoExtractor)
          video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
  
          # Extract title
 -        mobj = re.search(r'contentTitle = "(.*?)";', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract video title')
 -        video_title = mobj.group(1)
 +        video_title = self._search_regex(r'contentTitle = "(.*?)";',
 +            webpage, u'title')
  
          # Extract description
 -        video_description = u'No description available.'
 -        mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', webpage)
 -        if mobj is not None:
 -            video_description = mobj.group(1)
 +        video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
 +            webpage, u'description', fatal=False)
  
          video_filename = video_url.split('/')[-1]
          video_id, extension = video_filename.split('.')
@@@ -2849,10 -2857,15 +2872,10 @@@ class StanfordOpenClassroomIE(InfoExtra
                                          note='Downloading course info page',
                                          errnote='Unable to download course info page')
  
 -            m = re.search('<h1>([^<]+)</h1>', coursepage)
 -            if m:
 -                info['title'] = unescapeHTML(m.group(1))
 -            else:
 -                info['title'] = info['id']
 +            info['title'] = self._html_search_regex('<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
  
 -            m = re.search('<description>([^<]+)</description>', coursepage)
 -            if m:
 -                info['description'] = unescapeHTML(m.group(1))
 +            info['description'] = self._html_search_regex('<description>([^<]+)</description>',
 +                coursepage, u'description', fatal=False)
  
              links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
              info['list'] = [
@@@ -2913,17 -2926,25 +2936,17 @@@ class MTVIE(InfoExtractor)
  
          webpage = self._download_webpage(url, video_id)
  
 -        mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract song name')
 -        song_name = unescapeHTML(mobj.group(1).decode('iso-8859-1'))
 -        mobj = re.search(r'<meta name="mtv_an" content="([^"]+)"/>', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract performer')
 -        performer = unescapeHTML(mobj.group(1).decode('iso-8859-1'))
 -        video_title = performer + ' - ' + song_name
 +        song_name = self._html_search_regex(r'<meta name="mtv_vt" content="([^"]+)"/>',
 +            webpage, u'song name', fatal=False)
  
 -        mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to mtvn_uri')
 -        mtvn_uri = mobj.group(1)
 +        video_title = self._html_search_regex(r'<meta name="mtv_an" content="([^"]+)"/>',
 +            webpage, u'title')
  
 -        mobj = re.search(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract content id')
 -        content_id = mobj.group(1)
 +        mtvn_uri = self._html_search_regex(r'<meta name="mtvn_uri" content="([^"]+)"/>',
 +            webpage, u'mtvn_uri', fatal=False)
 +
 +        content_id = self._search_regex(r'MTVN.Player.defaultPlaylistId = ([0-9]+);',
 +            webpage, u'content id', fatal=False)
  
          videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
          self.report_extraction(video_id)
@@@ -3071,15 -3092,20 +3094,15 @@@ class XNXXIE(InfoExtractor)
          # Get webpage content
          webpage = self._download_webpage(url, video_id)
  
 -        result = re.search(self.VIDEO_URL_RE, webpage)
 -        if result is None:
 -            raise ExtractorError(u'Unable to extract video url')
 -        video_url = compat_urllib_parse.unquote(result.group(1))
 +        video_url = self._search_regex(self.VIDEO_URL_RE,
 +            webpage, u'video URL')
 +        video_url = compat_urllib_parse.unquote(video_url)
  
 -        result = re.search(self.VIDEO_TITLE_RE, webpage)
 -        if result is None:
 -            raise ExtractorError(u'Unable to extract video title')
 -        video_title = result.group(1)
 +        video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
 +            webpage, u'title')
  
 -        result = re.search(self.VIDEO_THUMB_RE, webpage)
 -        if result is None:
 -            raise ExtractorError(u'Unable to extract video thumbnail')
 -        video_thumbnail = result.group(1)
 +        video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
 +            webpage, u'thumbnail', fatal=False)
  
          return [{
              'id': video_id,
@@@ -3099,6 -3125,26 +3122,6 @@@ class GooglePlusIE(InfoExtractor)
      _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
      IE_NAME = u'plus.google'
  
 -    def report_extract_entry(self, url):
 -        """Report downloading extry"""
 -        self.to_screen(u'Downloading entry: %s' % url)
 -
 -    def report_date(self, upload_date):
 -        """Report downloading extry"""
 -        self.to_screen(u'Entry date: %s' % upload_date)
 -
 -    def report_uploader(self, uploader):
 -        """Report downloading extry"""
 -        self.to_screen(u'Uploader: %s' % uploader)
 -
 -    def report_title(self, video_title):
 -        """Report downloading extry"""
 -        self.to_screen(u'Title: %s' % video_title)
 -
 -    def report_extract_vid_page(self, video_page):
 -        """Report information extraction."""
 -        self.to_screen(u'Extracting video page: %s' % video_page)
 -
      def _real_extract(self, url):
          # Extract id from URL
          mobj = re.match(self._VALID_URL, url)
          video_extension = 'flv'
  
          # Step 1, Retrieve post webpage to extract further information
 -        self.report_extract_entry(post_url)
          webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage')
  
 +        self.report_extraction(video_id)
 +
          # Extract update date
 -        upload_date = None
 -        pattern = 'title="Timestamp">(.*?)</a>'
 -        mobj = re.search(pattern, webpage)
 -        if mobj:
 -            upload_date = mobj.group(1)
 +        upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
 +            webpage, u'upload date', fatal=False)
 +        if upload_date:
              # Convert timestring to a format suitable for filename
              upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
              upload_date = upload_date.strftime('%Y%m%d')
 -        self.report_date(upload_date)
  
          # Extract uploader
 -        uploader = None
 -        pattern = r'rel\="author".*?>(.*?)</a>'
 -        mobj = re.search(pattern, webpage)
 -        if mobj:
 -            uploader = mobj.group(1)
 -        self.report_uploader(uploader)
 +        uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
 +            webpage, u'uploader', fatal=False)
  
          # Extract title
          # Get the first line for title
 -        video_title = u'NA'
 -        pattern = r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]'
 -        mobj = re.search(pattern, webpage)
 -        if mobj:
 -            video_title = mobj.group(1)
 -        self.report_title(video_title)
 +        video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
 +            webpage, 'title', default=u'NA')
  
          # Step 2, Stimulate clicking the image box to launch video
 -        pattern = '"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]'
 -        mobj = re.search(pattern, webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract video page URL')
 -
 -        video_page = mobj.group(1)
 +        video_page = self._search_regex('"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]',
 +            webpage, u'video page URL')
          webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
 -        self.report_extract_vid_page(video_page)
 -
  
          # Extract video links on video page
          """Extract video links of all sizes"""
          }]
  
  class NBAIE(InfoExtractor):
 -    _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*)(\?.*)?$'
 +    _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
      IE_NAME = u'nba'
  
      def _real_extract(self, url):
              raise ExtractorError(u'Invalid URL: %s' % url)
  
          video_id = mobj.group(1)
 -        if video_id.endswith('/index.html'):
 -            video_id = video_id[:-len('/index.html')]
  
          webpage = self._download_webpage(url, video_id)
  
          video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
 -        def _findProp(rexp, default=None):
 -            m = re.search(rexp, webpage)
 -            if m:
 -                return unescapeHTML(m.group(1))
 -            else:
 -                return default
  
          shortened_video_id = video_id.rpartition('/')[2]
 -        title = _findProp(r'<meta property="og:title" content="(.*?)"', shortened_video_id).replace('NBA.com: ', '')
 +        title = self._html_search_regex(r'<meta property="og:title" content="(.*?)"',
 +            webpage, 'title', default=shortened_video_id).replace('NBA.com: ', '')
 +
 +        # It isn't there in the HTML it returns to us
 +        # uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
 +
 +        description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
 +
          info = {
              'id': shortened_video_id,
              'url': video_url,
              'ext': 'mp4',
              'title': title,
 -            'uploader_date': _findProp(r'<b>Date:</b> (.*?)</div>'),
 -            'description': _findProp(r'<div class="description">(.*?)</h1>'),
 +            # 'uploader_date': uploader_date,
 +            'description': description,
          }
          return [info]
  
@@@ -3345,21 -3408,30 +3368,21 @@@ class FunnyOrDieIE(InfoExtractor)
          video_id = mobj.group('id')
          webpage = self._download_webpage(url, video_id)
  
 -        m = re.search(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', webpage, re.DOTALL)
 -        if not m:
 -            raise ExtractorError(u'Unable to find video information')
 -        video_url = unescapeHTML(m.group('url'))
 +        video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
 +            webpage, u'video URL', flags=re.DOTALL)
  
 -        m = re.search(r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>", webpage, flags=re.DOTALL)
 -        if not m:
 -            m = re.search(r'<title>(?P<title>[^<]+?)</title>', webpage)
 -            if not m:
 -                raise ExtractorError(u'Cannot find video title')
 -        title = clean_html(m.group('title'))
 +        title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
 +            r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
  
 -        m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage)
 -        if m:
 -            desc = unescapeHTML(m.group('desc'))
 -        else:
 -            desc = None
 +        video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
 +            webpage, u'description', fatal=False, flags=re.DOTALL)
  
          info = {
              'id': video_id,
              'url': video_url,
              'ext': 'mp4',
              'title': title,
 -            'description': desc,
 +            'description': video_description,
          }
          return [info]
  
@@@ -3370,8 -3442,6 +3393,8 @@@ class SteamIE(InfoExtractor)
                  (?P<gameID>\d+)/?
                  (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
                  """
 +    _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
 +    _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
  
      @classmethod
      def suitable(cls, url):
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url, re.VERBOSE)
          gameID = m.group('gameID')
 -        videourl = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' % gameID
 -        self.report_age_confirmation()
 +
 +        videourl = self._VIDEO_PAGE_TEMPLATE % gameID
          webpage = self._download_webpage(videourl, gameID)
 -        game_title = re.search(r'<h2 class="pageheader">(?P<game_title>.*?)</h2>', webpage).group('game_title')
 -        
 +
 +        if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
 +            videourl = self._AGECHECK_TEMPLATE % gameID
 +            self.report_age_confirmation()
 +            webpage = self._download_webpage(videourl, gameID)
 +
 +        self.report_extraction(gameID)
 +        game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
 +                                             webpage, 'game title')
 +
          urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
          mweb = re.finditer(urlRE, webpage)
          namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
@@@ -3425,29 -3487,27 +3448,29 @@@ class UstreamIE(InfoExtractor)
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url)
          video_id = m.group('videoID')
 +
          video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
          webpage = self._download_webpage(url, video_id)
 +
          self.report_extraction(video_id)
 -        try:
 -            m = re.search(r'data-title="(?P<title>.+)"',webpage)
 -            title = m.group('title')
 -            m = re.search(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
 -                          webpage, re.DOTALL)
 -            uploader = unescapeHTML(m.group('uploader').strip())
 -            m = re.search(r'<link rel="image_src" href="(?P<thumb>.*?)"', webpage)
 -            thumb = m.group('thumb')
 -        except AttributeError:
 -            raise ExtractorError(u'Unable to extract info')
 +
 +        video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
 +            webpage, u'title')
 +
 +        uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
 +            webpage, u'uploader', fatal=False, flags=re.DOTALL)
 +
 +        thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
 +            webpage, u'thumbnail', fatal=False)
 +
          info = {
 -                'id':video_id,
 -                'url':video_url,
 +                'id': video_id,
 +                'url': video_url,
                  'ext': 'flv',
 -                'title': title,
 +                'title': video_title,
                  'uploader': uploader,
 -                'thumbnail': thumb,
 -                  }
 +                'thumbnail': thumbnail,
 +               }
          return info
  
  class WorldStarHipHopIE(InfoExtractor):
      IE_NAME = u'WorldStarHipHop'
  
      def _real_extract(self, url):
 -        _src_url = r'so\.addVariable\("file","(.*?)"\)'
 -
          m = re.match(self._VALID_URL, url)
          video_id = m.group('id')
  
 -        webpage_src = self._download_webpage(url, video_id) 
 +        webpage_src = self._download_webpage(url, video_id)
  
 -        mobj = re.search(_src_url, webpage_src)
 +        video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
 +            webpage_src, u'video URL')
  
 -        if mobj is not None:
 -            video_url = mobj.group(1)
 -            if 'mp4' in video_url:
 -                ext = 'mp4'
 -            else:
 -                ext = 'flv'
 +        if 'mp4' in video_url:
 +            ext = 'mp4'
          else:
 -            raise ExtractorError(u'Cannot find video url for %s' % video_id)
 +            ext = 'flv'
  
 -        mobj = re.search(r"<title>(.*)</title>", webpage_src)
 -
 -        if mobj is None:
 -            raise ExtractorError(u'Cannot determine title')
 -        title = mobj.group(1)
 +        video_title = self._html_search_regex(r"<title>(.*)</title>",
 +            webpage_src, u'title')
  
 -        mobj = re.search(r'rel="image_src" href="(.*)" />', webpage_src)
          # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
 -        if mobj is not None:
 -            thumbnail = mobj.group(1)
 -        else:
 +        thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',
 +            webpage_src, u'thumbnail', fatal=False)
 +
 +        if not thumbnail:
              _title = r"""candytitles.*>(.*)</span>"""
              mobj = re.search(_title, webpage_src)
              if mobj is not None:
 -                title = mobj.group(1)
 -            thumbnail = None
 +                video_title = mobj.group(1)
  
          results = [{
                      'id': video_id,
                      'url' : video_url,
 -                    'title' : title,
 +                    'title' : video_title,
                      'thumbnail' : thumbnail,
                      'ext' : ext,
                      }]
@@@ -3498,9 -3567,10 +3521,9 @@@ class RBMARadioIE(InfoExtractor)
          video_id = m.group('videoID')
  
          webpage = self._download_webpage(url, video_id)
 -        m = re.search(r'<script>window.gon = {.*?};gon\.show=(.+?);</script>', webpage)
 -        if not m:
 -            raise ExtractorError(u'Cannot find metadata')
 -        json_data = m.group(1)
 +
 +        json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
 +            webpage, u'json data', flags=re.MULTILINE)
  
          try:
              data = json.loads(json_data)
@@@ -3547,33 -3617,42 +3570,33 @@@ class YouPornIE(InfoExtractor)
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
              raise ExtractorError(u'Invalid URL: %s' % url)
 -
          video_id = mobj.group('videoid')
  
          req = compat_urllib_request.Request(url)
          req.add_header('Cookie', 'age_verified=1')
          webpage = self._download_webpage(req, video_id)
  
 -        # Get the video title
 -        result = re.search(r'<h1.*?>(?P<title>.*)</h1>', webpage)
 -        if result is None:
 -            raise ExtractorError(u'Unable to extract video title')
 -        video_title = result.group('title').strip()
 -
 -        # Get the video date
 -        result = re.search(r'Date:</label>(?P<date>.*) </li>', webpage)
 -        if result is None:
 -            self._downloader.report_warning(u'unable to extract video date')
 -            upload_date = None
 -        else:
 -            upload_date = unified_strdate(result.group('date').strip())
 +        # Get JSON parameters
 +        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
 +        try:
 +            params = json.loads(json_params)
 +        except:
 +            raise ExtractorError(u'Invalid JSON')
  
 -        # Get the video uploader
 -        result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
 -        if result is None:
 -            self._downloader.report_warning(u'unable to extract uploader')
 -            video_uploader = None
 -        else:
 -            video_uploader = result.group('uploader').strip()
 -            video_uploader = clean_html( video_uploader )
 +        self.report_extraction(video_id)
 +        try:
 +            video_title = params['title']
 +            upload_date = unified_strdate(params['release_date_f'])
 +            video_description = params['description']
 +            video_uploader = params['submitted_by']
 +            thumbnail = params['thumbnails'][0]['image']
 +        except KeyError:
 +            raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
  
          # Get all of the formats available
          DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
 -        result = re.search(DOWNLOAD_LIST_RE, webpage)
 -        if result is None:
 -            raise ExtractorError(u'Unable to extract download list')
 -        download_list_html = result.group('download_list').strip()
 +        download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
 +            webpage, u'download list').strip()
  
          # Get all of the links from the page
          LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
              size = format[0]
              bitrate = format[1]
              format = "-".join( format )
 -            title = u'%s-%s-%s' % (video_title, size, bitrate)
 +            title = u'%s-%s-%s' % (video_title, size, bitrate)
  
              formats.append({
                  'id': video_id,
                  'url': video_url,
                  'uploader': video_uploader,
                  'upload_date': upload_date,
 -                'title': title,
 +                'title': video_title,
                  'ext': extension,
                  'format': format,
 -                'thumbnail': None,
 -                'description': None,
 -                'player_url': None
 +                'thumbnail': thumbnail,
 +                'description': video_description
              })
  
          if self._downloader.params.get('listformats', None):
@@@ -3649,13 -3729,17 +3672,13 @@@ class PornotubeIE(InfoExtractor)
  
          # Get the video URL
          VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
 -        result = re.search(VIDEO_URL_RE, webpage)
 -        if result is None:
 -            raise ExtractorError(u'Unable to extract video url')
 -        video_url = compat_urllib_parse.unquote(result.group('url'))
 +        video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
 +        video_url = compat_urllib_parse.unquote(video_url)
  
          #Get the uploaded date
          VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
 -        result = re.search(VIDEO_UPLOADED_RE, webpage)
 -        if result is None:
 -            raise ExtractorError(u'Unable to extract video title')
 -        upload_date = unified_strdate(result.group('date'))
 +        upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
 +        if upload_date: upload_date = unified_strdate(upload_date)
  
          info = {'id': video_id,
                  'url': video_url,
@@@ -3682,8 -3766,10 +3705,8 @@@ class YouJizzIE(InfoExtractor)
          webpage = self._download_webpage(url, video_id)
  
          # Get the video title
 -        result = re.search(r'<title>(?P<title>.*)</title>', webpage)
 -        if result is None:
 -            raise ExtractorError(u'ERROR: unable to extract video title')
 -        video_title = result.group('title').strip()
 +        video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
 +            webpage, u'title').strip()
  
          # Get the embed page
          result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
          webpage = self._download_webpage(embed_page_url, video_id)
  
          # Get the video URL
 -        result = re.search(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', webpage)
 -        if result is None:
 -            raise ExtractorError(u'ERROR: unable to extract video url')
 -        video_url = result.group('source')
 +        video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
 +            webpage, u'video URL')
  
          info = {'id': video_id,
                  'url': video_url,
@@@ -3720,7 -3808,10 +3743,7 @@@ class EightTracksIE(InfoExtractor)
  
          webpage = self._download_webpage(url, playlist_id)
  
 -        m = re.search(r"PAGE.mix = (.*?);\n", webpage, flags=re.DOTALL)
 -        if not m:
 -            raise ExtractorError(u'Cannot find trax information')
 -        json_like = m.group(1)
 +        json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
          data = json.loads(json_like)
  
          session = str(random.randint(0, 1000000000))
@@@ -3756,22 -3847,18 +3779,22 @@@ class KeekIE(InfoExtractor)
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url)
          video_id = m.group('videoID')
 +
          video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
          thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
          webpage = self._download_webpage(url, video_id)
 -        m = re.search(r'<meta property="og:title" content="(?P<title>.*?)"', webpage)
 -        title = unescapeHTML(m.group('title'))
 -        m = re.search(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>', webpage)
 -        uploader = clean_html(m.group('uploader'))
 +
 +        video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
 +            webpage, u'title')
 +
 +        uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
 +            webpage, u'uploader', fatal=False)
 +
          info = {
                  'id': video_id,
                  'url': video_url,
                  'ext': 'mp4',
 -                'title': title,
 +                'title': video_title,
                  'thumbnail': thumbnail,
                  'uploader': uploader
          }
@@@ -3803,6 -3890,10 +3826,6 @@@ class TEDIE(InfoExtractor)
              self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
              return [self._playlist_videos_info(url,name,playlist_id)]
  
 -    def _talk_video_link(self,mediaSlug):
 -        '''Returns the video link for that mediaSlug'''
 -        return 'http://download.ted.com/talks/%s.mp4' % mediaSlug
 -
      def _playlist_videos_info(self,url,name,playlist_id=0):
          '''Returns the videos of the playlist'''
          video_RE=r'''
          m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
          m_names=re.finditer(video_name_RE,webpage)
  
 -        playlist_RE = r'div class="headline">(\s*?)<h1>(\s*?)<span>(?P<playlist_title>.*?)</span>'
 -        m_playlist = re.search(playlist_RE, webpage)
 -        playlist_title = m_playlist.group('playlist_title')
 +        playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
 +                                                 webpage, 'playlist title')
  
          playlist_entries = []
          for m_video, m_name in zip(m_videos,m_names):
  
      def _talk_info(self, url, video_id=0):
          """Return the video for the talk in the url"""
 -        m=re.match(self._VALID_URL, url,re.VERBOSE)
 -        videoName=m.group('name')
 -        webpage=self._download_webpage(url, video_id, 'Downloading \"%s\" page' % videoName)
 +        m = re.match(self._VALID_URL, url,re.VERBOSE)
 +        video_name = m.group('name')
 +        webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
 +        self.report_extraction(video_name)
          # If the url includes the language we get the title translated
 -        title_RE=r'<span id="altHeadline" >(?P<title>.*)</span>'
 -        title=re.search(title_RE, webpage).group('title')
 -        info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?)
 -                        "id":(?P<videoID>[\d]+).*?
 -                        "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"'''
 -        thumb_RE=r'</span>[\s.]*</div>[\s.]*<img src="(?P<thumbnail>.*?)"'
 -        thumb_match=re.search(thumb_RE,webpage)
 -        info_match=re.search(info_RE,webpage,re.VERBOSE)
 -        video_id=info_match.group('videoID')
 -        mediaSlug=info_match.group('mediaSlug')
 -        video_url=self._talk_video_link(mediaSlug)
 +        title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
 +                                        webpage, 'title')
 +        json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
 +                                    webpage, 'json data')
 +        info = json.loads(json_data)
 +        desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>',
 +                                       webpage, 'description', flags = re.DOTALL)
 +        
 +        thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
 +                                       webpage, 'thumbnail')
          info = {
 -                'id': video_id,
 -                'url': video_url,
 +                'id': info['id'],
 +                'url': info['htmlStreams'][-1]['file'],
                  'ext': 'mp4',
                  'title': title,
 -                'thumbnail': thumb_match.group('thumbnail')
 +                'thumbnail': thumbnail,
 +                'description': desc,
                  }
          return info
  
@@@ -3914,9 -4005,10 +3937,9 @@@ class SpiegelIE(InfoExtractor)
          video_id = m.group('videoID')
  
          webpage = self._download_webpage(url, video_id)
 -        m = re.search(r'<div class="module-title">(.*?)</div>', webpage)
 -        if not m:
 -            raise ExtractorError(u'Cannot find title')
 -        video_title = unescapeHTML(m.group(1))
 +
 +        video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
 +            webpage, u'title')
  
          xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
          xml_code = self._download_webpage(xml_url, video_id,
@@@ -3952,25 -4044,35 +3975,25 @@@ class LiveLeakIE(InfoExtractor)
  
          webpage = self._download_webpage(url, video_id)
  
 -        m = re.search(r'file: "(.*?)",', webpage)
 -        if not m:
 -            raise ExtractorError(u'Unable to find video url')
 -        video_url = m.group(1)
 +        video_url = self._search_regex(r'file: "(.*?)",',
 +            webpage, u'video URL')
  
 -        m = re.search(r'<meta property="og:title" content="(?P<title>.*?)"', webpage)
 -        if not m:
 -            raise ExtractorError(u'Cannot find video title')
 -        title = unescapeHTML(m.group('title')).replace('LiveLeak.com -', '').strip()
 +        video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
 +            webpage, u'title').replace('LiveLeak.com -', '').strip()
  
 -        m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage)
 -        if m:
 -            desc = unescapeHTML(m.group('desc'))
 -        else:
 -            desc = None
 +        video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
 +            webpage, u'description', fatal=False)
  
 -        m = re.search(r'By:.*?(\w+)</a>', webpage)
 -        if m:
 -            uploader = clean_html(m.group(1))
 -        else:
 -            uploader = None
 +        video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
 +            webpage, u'uploader', fatal=False)
  
          info = {
              'id':  video_id,
              'url': video_url,
              'ext': 'mp4',
 -            'title': title,
 -            'description': desc,
 -            'uploader': uploader
 +            'title': video_title,
 +            'description': video_description,
 +            'uploader': video_uploader
          }
  
          return [info]
@@@ -4086,23 -4188,23 +4109,23 @@@ class TumblrIE(InfoExtractor)
          re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
          video = re.search(re_video, webpage)
          if video is None:
 -            self.to_screen("No video found")
 -            return []
 +           raise ExtractorError(u'Unable to extract video')
          video_url = video.group('video_url')
          ext = video.group('ext')
  
 -        re_thumb = r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22'  # We pick the first poster
 -        thumb = re.search(re_thumb, webpage).group('thumb').replace('\\', '')
 +        video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
 +            webpage, u'thumbnail', fatal=False)  # We pick the first poster
 +        if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
  
          # The only place where you can get a title, it's not complete,
          # but searching in other places doesn't work for all videos
 -        re_title = r'<title>(?P<title>.*?)</title>'
 -        title = unescapeHTML(re.search(re_title, webpage, re.DOTALL).group('title'))
 +        video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
 +            webpage, u'title', flags=re.DOTALL)
  
          return [{'id': video_id,
                   'url': video_url,
 -                 'title': title,
 -                 'thumbnail': thumb,
 +                 'title': video_title,
 +                 'thumbnail': video_thumbnail,
                   'ext': ext
                   }]
  
@@@ -4116,7 -4218,7 +4139,7 @@@ class BandcampIE(InfoExtractor)
          # We get the link to the free download page
          m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
          if m_download is None:
 -            raise ExtractorError(u'No free songs founded')
 +            raise ExtractorError(u'No free songs found')
  
          download_link = m_download.group(1)
          id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', 
  
          track_info = {'id':id,
                        'title' : info[u'title'],
 -                      'ext' : 'mp3',
 -                      'url' : final_url,
 +                      'ext' :   'mp3',
 +                      'url' :   final_url,
                        'thumbnail' : info[u'thumb_url'],
 -                      'uploader' : info[u'artist']
 +                      'uploader' :  info[u'artist']
                        }
  
          return [track_info]
@@@ -4164,14 -4266,17 +4187,14 @@@ class RedTubeIE(InfoExtractor)
          video_id = mobj.group('id')
          video_extension = 'mp4'        
          webpage = self._download_webpage(url, video_id)
 +
          self.report_extraction(video_id)
 -        mobj = re.search(r'<source src="'+'(.+)'+'" type="video/mp4">',webpage)
  
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract media URL')
 +        video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
 +            webpage, u'video URL')
  
 -        video_url = mobj.group(1)
 -        mobj = re.search('<h1 class="videoTitle slidePanelMovable">(.+)</h1>',webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract title')
 -        video_title = mobj.group(1)
 +        video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
 +            webpage, u'title')
  
          return [{
              'id':       video_id,
@@@ -4192,13 -4297,15 +4215,13 @@@ class InaIE(InfoExtractor)
          video_extension = 'mp4'
          webpage = self._download_webpage(mrss_url, video_id)
  
 -        mobj = re.search(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract media URL')
 -        video_url = mobj.group(1)
 +        self.report_extraction(video_id)
  
 -        mobj = re.search(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract title')
 -        video_title = mobj.group(1)
 +        video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
 +            webpage, u'video URL')
 +
 +        video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
 +            webpage, u'title')
  
          return [{
              'id':       video_id,
@@@ -4220,17 -4327,27 +4243,17 @@@ class HowcastIE(InfoExtractor)
  
          self.report_extraction(video_id)
  
 -        mobj = re.search(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)"', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract video URL')
 -        video_url = mobj.group(1)
 +        video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
 +            webpage, u'video URL')
  
 -        mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract title')
 -        video_title = mobj.group(1) or mobj.group(2)
 +        video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
 +            webpage, u'title')
  
 -        mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', webpage)
 -        if mobj is None:
 -            self._downloader.report_warning(u'unable to extract description')
 -            video_description = None
 -        else:
 -            video_description = mobj.group(1) or mobj.group(2)
 +        video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
 +            webpage, u'description', fatal=False)
  
 -        mobj = re.search(r'<meta content=\'(.+?)\' property=\'og:image\'', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract thumbnail')
 -        thumbnail = mobj.group(1)
 +        thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
 +            webpage, u'thumbnail', fatal=False)
  
          return [{
              'id':       video_id,
@@@ -4246,6 -4363,7 +4269,6 @@@ class VineIE(InfoExtractor)
      _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
  
      def _real_extract(self, url):
 -
          mobj = re.match(self._VALID_URL, url)
  
          video_id = mobj.group('id')
  
          self.report_extraction(video_id)
  
 -        mobj = re.search(r'<meta property="twitter:player:stream" content="(.+?)"', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract video URL')
 -        video_url = mobj.group(1)
 +        video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
 +            webpage, u'video URL')
  
 -        mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract title')
 -        video_title = mobj.group(1)
 +        video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
 +            webpage, u'title')
  
 -        mobj = re.search(r'<meta property="og:image" content="(.+?)(\?.*?)?"', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract thumbnail')
 -        thumbnail = mobj.group(1)
 +        thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
 +            webpage, u'thumbnail', fatal=False)
  
 -        mobj = re.search(r'<div class="user">.*?<h2>(.+?)</h2>', webpage, re.DOTALL)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract uploader')
 -        uploader = mobj.group(1)
 +        uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
 +            webpage, u'uploader', fatal=False, flags=re.DOTALL)
  
          return [{
              'id':        video_id,
@@@ -4287,13 -4413,18 +4310,13 @@@ class FlickrIE(InfoExtractor)
          webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
          webpage = self._download_webpage(webpage_url, video_id)
  
 -        mobj = re.search(r"photo_secret: '(\w+)'", webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract video secret')
 -        secret = mobj.group(1)
 +        secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
  
          first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
          first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
  
 -        mobj = re.search(r'<Item id="id">(\d+-\d+)</Item>', first_xml)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract node_id')
 -        node_id = mobj.group(1)
 +        node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
 +            first_xml, u'node_id')
  
          second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
          second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
              raise ExtractorError(u'Unable to extract video url')
          video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
  
 -        mobj = re.search(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract title')
 -        video_title = mobj.group(1) or mobj.group(2)
 +        video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
 +            webpage, u'video title')
  
 -        mobj = re.search(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
 -        if mobj is None:
 -            self._downloader.report_warning(u'unable to extract description')
 -            video_description = None
 -        else:
 -            video_description = mobj.group(1) or mobj.group(2)
 +        video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
 +            webpage, u'description', fatal=False)
  
 -        mobj = re.search(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract thumbnail')
 -        thumbnail = mobj.group(1) or mobj.group(2)
 +        thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
 +            webpage, u'thumbnail', fatal=False)
  
          return [{
              'id':          video_id,
@@@ -4334,25 -4473,32 +4357,25 @@@ class TeamcocoIE(InfoExtractor)
          url_title = mobj.group('url_title')
          webpage = self._download_webpage(url, url_title)
  
 -        mobj = re.search(r'<article class="video" data-id="(\d+?)"', webpage)
 -        video_id = mobj.group(1)
 +        video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
 +            webpage, u'video id')
  
          self.report_extraction(video_id)
  
 -        mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract title')
 -        video_title = mobj.group(1)
 +        video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
 +            webpage, u'title')
  
 -        mobj = re.search(r'<meta property="og:image" content="(.+?)"', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract thumbnail')
 -        thumbnail = mobj.group(1)
 +        thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
 +            webpage, u'thumbnail', fatal=False)
  
 -        mobj = re.search(r'<meta property="og:description" content="(.*?)"', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract description')
 -        description = mobj.group(1)
 +        video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
 +            webpage, u'description', fatal=False)
  
          data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
          data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
 -        mobj = re.search(r'<file type="high".*?>(.*?)</file>', data)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract video url')
 -        video_url = mobj.group(1)
 +
 +        video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
 +            data, u'video URL')
  
          return [{
              'id':          video_id,
              'ext':         'mp4',
              'title':       video_title,
              'thumbnail':   thumbnail,
 -            'description': description,
 +            'description': video_description,
          }]
 -        
 +
  class XHamsterIE(InfoExtractor):
      """Information Extractor for xHamster"""
      _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
          mobj = re.match(self._VALID_URL, url)
  
          video_id = mobj.group('id')
 -        mrss_url='http://xhamster.com/movies/%s/.html' % video_id
 +        mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
          webpage = self._download_webpage(mrss_url, video_id)
 +
          mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
          if mobj is None:
              raise ExtractorError(u'Unable to extract media URL')
              video_url = mobj.group('server')+'/key='+mobj.group('file')
          video_extension = video_url.split('.')[-1]
  
 -        mobj = re.search(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract title')
 -        video_title = unescapeHTML(mobj.group('title'))
 +        video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
 +            webpage, u'title')
  
 -        mobj = re.search(r'<span>Description: </span>(?P<description>[^<]+)', webpage)
 -        if mobj is None:
 -            video_description = u''
 -        else:
 -            video_description = unescapeHTML(mobj.group('description'))
 +        # Can't see the description anywhere in the UI
 +        # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
 +        #     webpage, u'description', fatal=False)
 +        # if video_description: video_description = unescapeHTML(video_description)
  
          mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract upload date')
 -        video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
 -
 -        mobj = re.search(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^>]+)', webpage)
 -        if mobj is None:
 -            video_uploader_id = u'anonymous'
 +        if mobj:
 +            video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
          else:
 -            video_uploader_id = mobj.group('uploader_id')
 +            video_upload_date = None
 +            self._downloader.report_warning(u'Unable to extract upload date')
  
 -        mobj = re.search(r'\'image\':\'(?P<thumbnail>[^\']+)\'', webpage)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extract thumbnail URL')
 -        video_thumbnail = mobj.group('thumbnail')
 +        video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
 +            webpage, u'uploader id', default=u'anonymous')
 +
 +        video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
 +            webpage, u'thumbnail', fatal=False)
  
          return [{
              'id':       video_id,
              'url':      video_url,
              'ext':      video_extension,
              'title':    video_title,
 -            'description': video_description,
 +            'description': video_description,
              'upload_date': video_upload_date,
              'uploader_id': video_uploader_id,
              'thumbnail': video_thumbnail
@@@ -4433,9 -4584,10 +4456,9 @@@ class HypemIE(InfoExtractor)
          cookie = urlh.headers.get('Set-Cookie', '')
  
          self.report_extraction(track_id)
 -        mobj = re.search(r'<script type="application/json" id="displayList-data">(.*?)</script>', response, flags=re.MULTILINE|re.DOTALL)
 -        if mobj is None:
 -            raise ExtractorError(u'Unable to extrack tracks')
 -        html_tracks = mobj.group(1).strip()
 +
 +        html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
 +            response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
          try:
              track_list = json.loads(html_tracks)
              track = track_list[u'tracks'][0]
@@@ -4476,12 -4628,11 +4499,12 @@@ class Vbox7IE(InfoExtractor)
          video_id = mobj.group(1)
  
          redirect_page, urlh = self._download_webpage_handle(url, video_id)
 -        redirect_url = urlh.geturl() + re.search(r'window\.location = \'(.*)\';', redirect_page).group(1)
 +        new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
 +        redirect_url = urlh.geturl() + new_location
          webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
  
 -        title = re.search(r'<title>(.*)</title>', webpage)
 -        title = (title.group(1)).split('/')[0].strip()
 +        title = self._html_search_regex(r'<title>(.*)</title>',
 +            webpage, u'title').split('/')[0].strip()
  
          ext = "flv"
          info_url = "http://vbox7.com/play/magare.do"
              'thumbnail': thumbnail_url,
          }]
  
 +class GametrailersIE(InfoExtractor):
 +    _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
 +
 +    def _real_extract(self, url):
 +        mobj = re.match(self._VALID_URL, url)
 +        if mobj is None:
 +            raise ExtractorError(u'Invalid URL: %s' % url)
 +        video_id = mobj.group('id')
 +        video_type = mobj.group('type')
 +        webpage = self._download_webpage(url, video_id)
 +        if video_type == 'full-episodes':
 +            mgid_re = r'data-video="(?P<mgid>mgid:.*?)"'
 +        else:
 +            mgid_re = r'data-contentId=\'(?P<mgid>mgid:.*?)\''
 +        mgid = self._search_regex(mgid_re, webpage, u'mgid')
 +        data = compat_urllib_parse.urlencode({'uri': mgid, 'acceptMethods': 'fms'})
 +
 +        info_page = self._download_webpage('http://www.gametrailers.com/feeds/mrss?' + data,
 +                                           video_id, u'Downloading video info')
 +        links_webpage = self._download_webpage('http://www.gametrailers.com/feeds/mediagen/?' + data,
 +                                               video_id, u'Downloading video urls info')
 +
 +        self.report_extraction(video_id)
 +        info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.*
 +                      <description><!\[CDATA\[(?P<description>.*?)\]\]></description>.*
 +                      <image>.*
 +                        <url>(?P<thumb>.*?)</url>.*
 +                      </image>'''
 +
 +        m_info = re.search(info_re, info_page, re.VERBOSE|re.DOTALL)
 +        if m_info is None:
 +            raise ExtractorError(u'Unable to extract video info')
 +        video_title = m_info.group('title')
 +        video_description = m_info.group('description')
 +        video_thumb = m_info.group('thumb')
 +
 +        m_urls = list(re.finditer(r'<src>(?P<url>.*)</src>', links_webpage))
 +        if m_urls is None or len(m_urls) == 0:
 +            raise ExtractError(u'Unable to extrat video url')
 +        # They are sorted from worst to best quality
 +        video_url = m_urls[-1].group('url')
 +
 +        return {'url':         video_url,
 +                'id':          video_id,
 +                'title':       video_title,
 +                # Videos are actually flv not mp4
 +                'ext':         'flv',
 +                'thumbnail':   video_thumb,
 +                'description': video_description,
 +                }
 +
 +class StatigramIE(InfoExtractor):
 +    _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
 +
 +    def _real_extract(self, url):
 +        mobj = re.match(self._VALID_URL, url)
 +
 +        video_id = mobj.group(1)
 +        webpage = self._download_webpage(url, video_id)
 +        video_url = self._html_search_regex(
 +            r'<meta property="og:video:secure_url" content="(.+?)">',
 +            webpage, u'video URL')
 +        thumbnail_url = self._html_search_regex(
 +            r'<meta property="og:image" content="(.+?)" />',
 +            webpage, u'thumbnail URL', fatal=False)
 +        html_title = self._html_search_regex(
 +            r'<title>(.+?)</title>',
 +            webpage, u'title')
 +        title = html_title.rpartition(u' | Statigram')[0]
 +        uploader = self._html_search_regex(
 +            r'@(.+) \(Videos\)', title, u'uploader name', fatal=False)
 +        ext = 'mp4'
 +
 +        return [{
 +            'id':        video_id,
 +            'url':       video_url,
 +            'ext':       ext,
 +            'title':     title,
 +            'thumbnail': thumbnail_url,
 +            'uploader' : uploader
 +        }]
 +
  def gen_extractors():
      """ Return a list of an instance of every supported extractor.
      The order does matter; the first extractor matched is the one handling the URL.
          XHamsterIE(),
          HypemIE(),
          Vbox7IE(),
 +        GametrailersIE(),
 +        StatigramIE(),
          GenericIE()
      ]
  
diff --combined youtube_dl/__init__.py
index 32141439f6c26a1445462680713aa621f6bf8268,18be9f156b8732b4db637ae732b0e97680f4e8a5..6f9ffba1e1d7756f57c55cf8fce5a17b9f60da4d
@@@ -200,7 -200,7 +200,7 @@@ def parseOpts(overrideArguments=None)
              action='store_true', dest='listsubtitles',
              help='lists all available subtitles for the video (currently youtube only)', default=False)
      video_format.add_option('--sub-format',
 -            action='store', dest='subtitlesformat', metavar='LANG',
 +            action='store', dest='subtitlesformat', metavar='FORMAT',
              help='subtitle format [srt/sbv] (default=srt) (currently youtube only)', default='srt')
      video_format.add_option('--sub-lang', '--srt-lang',
              action='store', dest='subtitleslang', metavar='LANG',
@@@ -423,7 -423,7 +423,7 @@@ def _real_main(argv=None)
      if opts.usenetrc and (opts.username is not None or opts.password is not None):
          parser.error(u'using .netrc conflicts with giving username/password')
      if opts.password is not None and opts.username is None:
-         parser.error(u'account username missing')
+         print(u'WARNING: account username missing')
      if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
          parser.error(u'using output template conflicts with using title, video ID or auto number')
      if opts.usetitle and opts.useid: