]> gitweb @ CieloNegro.org - youtube-dl.git/blobdiff - youtube_dl/extractor/generic.py
[extractor/generic] Improve instagram embeds (Closes #9213)
[youtube-dl.git] / youtube_dl / extractor / generic.py
index f3de738f765819da7cbda1332e9b6b6dcada5052..95d23325900e8ed0d61bcec5d09ff3ba3d5e7a82 100644 (file)
@@ -60,6 +60,7 @@ from .googledrive import GoogleDriveIE
 from .jwplatform import JWPlatformIE
 from .digiteka import DigitekaIE
 from .instagram import InstagramIE
+from .liveleak import LiveLeakIE
 
 
 class GenericIE(InfoExtractor):
@@ -104,7 +105,8 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,  # infinite live stream
             },
             'expected_warnings': [
-                r'501.*Not Implemented'
+                r'501.*Not Implemented',
+                r'400.*Bad Request',
             ],
         },
         # Direct link with incorrect MIME type
@@ -406,19 +408,6 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
         },
-        # multiple ooyala embeds on SBN network websites
-        {
-            'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
-            'info_dict': {
-                'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
-                'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
-            },
-            'playlist_mincount': 3,
-            'params': {
-                'skip_download': True,
-            },
-            'add_ie': ['Ooyala'],
-        },
         # embed.ly video
         {
             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
@@ -1141,6 +1130,30 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        # Another form of arte.tv embed
+        {
+            'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
+            'md5': '850bfe45417ddf221288c88a0cffe2e2',
+            'info_dict': {
+                'id': '030273-562_PLUS7-F',
+                'ext': 'mp4',
+                'title': 'ARTE Reportage - Nulle part, en France',
+                'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
+                'upload_date': '20160409',
+            },
+        },
+        # LiveLeak embed
+        {
+            'url': 'http://www.wykop.pl/link/3088787/',
+            'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
+            'info_dict': {
+                'id': '874_1459135191',
+                'ext': 'mp4',
+                'title': 'Man shows poor quality of new apartment building',
+                'description': 'The wall is like a sand pile.',
+                'uploader': 'Lake8737',
+            }
+        },
     ]
 
     def report_following_redirect(self, new_url):
@@ -1715,7 +1728,7 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded arte.tv player
         mobj = re.search(
-            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
+            r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
             webpage)
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
@@ -1943,7 +1956,13 @@ class GenericIE(InfoExtractor):
         # Look for Instagram embeds
         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
         if instagram_embed_url is not None:
-            return self.url_result(instagram_embed_url, InstagramIE.ie_key())
+            return self.url_result(
+                self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
+
+        # Look for LiveLeak embeds
+        liveleak_url = LiveLeakIE._extract_url(webpage)
+        if liveleak_url:
+            return self.url_result(liveleak_url, 'LiveLeak')
 
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):