added StatigrIE

[youtube-dl.git] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index 619ddeba1c8ff06587d4c4945aff0c679053555f..4aec8c6879e79ccc13cf5e9bcfbf9abc17c4d1d1 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -1409,6 +1409,13 @@ class GenericIE(InfoExtractor):
          if mobj is None:
              # Try to find twitter cards info
              mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
+        if mobj is None:
+            # We look for Open Graph info:
+            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
+            m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
+            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
+            if m_video_type is not None:
+                mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
          if mobj is None:
              raise ExtractorError(u'Invalid URL: %s' % url)
  
@@ -4545,6 +4552,29 @@ class GametrailersIE(InfoExtractor):
                  'description': video_description,
                  }
  
+class StatigrIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)
+        video_id = mobj.group(1)
+        webpage = self._download_webpage(url, video_id)
+        video_url = re.search(r'<meta property="og:video:secure_url" content="(.+?)">',webpage).group(1)
+        thumbnail_url = re.search(r'<meta property="og:image" content="(.+?)" />',webpage).group(1)
+        title = (re.search(r'<title>(.+?)</title>',webpage).group(1)).strip("| Statigram")
+        uploader = re.search(r'@(.+) \(Videos\)',title).group(1)
+        ext = "mp4"
+        return [{
+            'id':        video_id,
+            'url':       video_url,
+            'ext':       ext,
+            'title':     title,
+            'thumbnail': thumbnail_url,
+            'uploader' : uploader
+        }]
+
  def gen_extractors():
      """ Return a list of an instance of every supported extractor.
      The order does matter; the first extractor matched is the one handling the URL.
@@ -4611,6 +4641,7 @@ def gen_extractors():
          HypemIE(),
          Vbox7IE(),
          GametrailersIE(),
+        StatigrIE(),
          GenericIE()
      ]