]> gitweb @ CieloNegro.org - youtube-dl.git/blobdiff - youtube_dl/InfoExtractors.py
added StatigrIE
[youtube-dl.git] / youtube_dl / InfoExtractors.py
index 17e0f83232856270d4e90be6bf18148a4db13d18..4aec8c6879e79ccc13cf5e9bcfbf9abc17c4d1d1 100755 (executable)
@@ -420,7 +420,7 @@ class YoutubeIE(InfoExtractor):
     def _request_automatic_caption(self, video_id, webpage):
         """We need the webpage for getting the captions url, pass it as an
            argument to speed up the process."""
-        sub_lang = self._downloader.params.get('subtitleslang')
+        sub_lang = self._downloader.params.get('subtitleslang') or 'en'
         sub_format = self._downloader.params.get('subtitlesformat')
         self.to_screen(u'%s: Looking for automatic captions' % video_id)
         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
@@ -699,14 +699,14 @@ class YoutubeIE(InfoExtractor):
                         pass
                     else:
                         # We report the original error
-                        self._downloader.report_error(sub_error)
+                        self._downloader.report_warning(sub_error)
 
         if self._downloader.params.get('allsubtitles', False):
             video_subtitles = self._extract_all_subtitles(video_id)
             for video_subtitle in video_subtitles:
                 (sub_error, sub_lang, sub) = video_subtitle
                 if sub_error:
-                    self._downloader.report_error(sub_error)
+                    self._downloader.report_warning(sub_error)
 
         if self._downloader.params.get('listsubtitles', False):
             sub_lang_list = self._list_available_subtitles(video_id)
@@ -732,8 +732,11 @@ class YoutubeIE(InfoExtractor):
             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
                 url_data = compat_parse_qs(url_data_str)
                 if 'itag' in url_data and 'url' in url_data:
-                    url = url_data['url'][0] + '&signature=' + url_data['sig'][0]
-                    if not 'ratebypass' in url: url += '&ratebypass=yes'
+                    url = url_data['url'][0]
+                    if 'sig' in url_data:
+                        url += '&signature=' + url_data['sig'][0]
+                    if 'ratebypass' not in url:
+                        url += '&ratebypass=yes'
                     url_map[url_data['itag'][0]] = url
 
             format_limit = self._downloader.params.get('format_limit', None)
@@ -1406,6 +1409,13 @@ class GenericIE(InfoExtractor):
         if mobj is None:
             # Try to find twitter cards info
             mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
+        if mobj is None:
+            # We look for Open Graph info:
+            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
+            m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
+            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
+            if m_video_type is not None:
+                mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
         if mobj is None:
             raise ExtractorError(u'Invalid URL: %s' % url)
 
@@ -1606,9 +1616,10 @@ class YoutubePlaylistIE(InfoExtractor):
                 # Number of videos is a multiple of self._MAX_RESULTS
                 break
 
-            videos += [ (entry['yt$position']['$t'], entry['content']['src'])
-                        for entry in response['feed']['entry']
-                        if 'content' in entry ]
+            for entry in response['feed']['entry']:
+                index = entry['yt$position']['$t']
+                if 'media$group' in entry and 'media$player' in entry['media$group']:
+                    videos.append((index, entry['media$group']['media$player']['url']))
 
             if len(response['feed']['entry']) < self._MAX_RESULTS:
                 break
@@ -4541,6 +4552,29 @@ class GametrailersIE(InfoExtractor):
                 'description': video_description,
                 }
 
+class StatigrIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)
+        video_id = mobj.group(1)
+        webpage = self._download_webpage(url, video_id)
+        video_url = re.search(r'<meta property="og:video:secure_url" content="(.+?)">',webpage).group(1)
+        thumbnail_url = re.search(r'<meta property="og:image" content="(.+?)" />',webpage).group(1)
+        title = (re.search(r'<title>(.+?)</title>',webpage).group(1)).strip("| Statigram")
+        uploader = re.search(r'@(.+) \(Videos\)',title).group(1)
+        ext = "mp4"
+        return [{
+            'id':        video_id,
+            'url':       video_url,
+            'ext':       ext,
+            'title':     title,
+            'thumbnail': thumbnail_url,
+            'uploader' : uploader
+        }]
+
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
     The order does matter; the first extractor matched is the one handling the URL.
@@ -4607,6 +4641,7 @@ def gen_extractors():
         HypemIE(),
         Vbox7IE(),
         GametrailersIE(),
+        StatigrIE(),
         GenericIE()
     ]