added StatigrIE

[youtube-dl.git] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index 17e0f83232856270d4e90be6bf18148a4db13d18..4aec8c6879e79ccc13cf5e9bcfbf9abc17c4d1d1 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -420,7 +420,7 @@ class YoutubeIE(InfoExtractor):
      def _request_automatic_caption(self, video_id, webpage):
          """We need the webpage for getting the captions url, pass it as an
             argument to speed up the process."""
-        sub_lang = self._downloader.params.get('subtitleslang')
+        sub_lang = self._downloader.params.get('subtitleslang') or 'en'
          sub_format = self._downloader.params.get('subtitlesformat')
          self.to_screen(u'%s: Looking for automatic captions' % video_id)
          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
@@ -699,14 +699,14 @@ class YoutubeIE(InfoExtractor):
                          pass
                      else:
                          # We report the original error
-                        self._downloader.report_error(sub_error)
+                        self._downloader.report_warning(sub_error)
  
          if self._downloader.params.get('allsubtitles', False):
              video_subtitles = self._extract_all_subtitles(video_id)
              for video_subtitle in video_subtitles:
                  (sub_error, sub_lang, sub) = video_subtitle
                  if sub_error:
-                    self._downloader.report_error(sub_error)
+                    self._downloader.report_warning(sub_error)
  
          if self._downloader.params.get('listsubtitles', False):
              sub_lang_list = self._list_available_subtitles(video_id)
@@ -732,8 +732,11 @@ class YoutubeIE(InfoExtractor):
              for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
                  url_data = compat_parse_qs(url_data_str)
                  if 'itag' in url_data and 'url' in url_data:
-                    url = url_data['url'][0] + '&signature=' + url_data['sig'][0]
-                    if not 'ratebypass' in url: url += '&ratebypass=yes'
+                    url = url_data['url'][0]
+                    if 'sig' in url_data:
+                        url += '&signature=' + url_data['sig'][0]
+                    if 'ratebypass' not in url:
+                        url += '&ratebypass=yes'
                      url_map[url_data['itag'][0]] = url
  
              format_limit = self._downloader.params.get('format_limit', None)
@@ -1406,6 +1409,13 @@ class GenericIE(InfoExtractor):
          if mobj is None:
              # Try to find twitter cards info
              mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
+        if mobj is None:
+            # We look for Open Graph info:
+            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
+            m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
+            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
+            if m_video_type is not None:
+                mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
          if mobj is None:
              raise ExtractorError(u'Invalid URL: %s' % url)
  
@@ -1606,9 +1616,10 @@ class YoutubePlaylistIE(InfoExtractor):
                  # Number of videos is a multiple of self._MAX_RESULTS
                  break
  
-            videos += [ (entry['yt$position']['$t'], entry['content']['src'])
-                        for entry in response['feed']['entry']
-                        if 'content' in entry ]
+            for entry in response['feed']['entry']:
+                index = entry['yt$position']['$t']
+                if 'media$group' in entry and 'media$player' in entry['media$group']:
+                    videos.append((index, entry['media$group']['media$player']['url']))
  
              if len(response['feed']['entry']) < self._MAX_RESULTS:
                  break
@@ -4541,6 +4552,29 @@ class GametrailersIE(InfoExtractor):
                  'description': video_description,
                  }
  
+class StatigrIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)
+        video_id = mobj.group(1)
+        webpage = self._download_webpage(url, video_id)
+        video_url = re.search(r'<meta property="og:video:secure_url" content="(.+?)">',webpage).group(1)
+        thumbnail_url = re.search(r'<meta property="og:image" content="(.+?)" />',webpage).group(1)
+        title = (re.search(r'<title>(.+?)</title>',webpage).group(1)).strip("| Statigram")
+        uploader = re.search(r'@(.+) \(Videos\)',title).group(1)
+        ext = "mp4"
+        return [{
+            'id':        video_id,
+            'url':       video_url,
+            'ext':       ext,
+            'title':     title,
+            'thumbnail': thumbnail_url,
+            'uploader' : uploader
+        }]
+
  def gen_extractors():
      """ Return a list of an instance of every supported extractor.
      The order does matter; the first extractor matched is the one handling the URL.
@@ -4607,6 +4641,7 @@ def gen_extractors():
          HypemIE(),
          Vbox7IE(),
          GametrailersIE(),
+        StatigrIE(),
          GenericIE()
      ]