]> gitweb @ CieloNegro.org - youtube-dl.git/blobdiff - youtube_dl/extractor/francetv.py
[francetv] Fix f4m extraction
[youtube-dl.git] / youtube_dl / extractor / francetv.py
index b2c984bf272783d8fa71ebe0679066b3d98d25f5..624895ff97bd44b8ed85f9975c1dddb19135cf55 100644 (file)
@@ -14,7 +14,6 @@ from ..utils import (
     clean_html,
     ExtractorError,
     int_or_none,
-    float_or_none,
     parse_duration,
     determine_ext,
 )
@@ -59,9 +58,8 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                     # See https://github.com/rg3/youtube-dl/issues/3963
                     # m3u8 urls work fine
                     continue
-                video_url_parsed = compat_urllib_parse_urlparse(video_url)
                 f4m_url = self._download_webpage(
-                    'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url_parsed.path,
+                    'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url,
                     video_id, 'Downloading f4m manifest token', fatal=False)
                 if f4m_url:
                     formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
@@ -87,7 +85,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
             'title': info['titre'],
             'description': clean_html(info['synopsis']),
             'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
-            'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']),
+            'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
             'timestamp': int_or_none(info['diffusion']['timestamp']),
             'formats': formats,
         }
@@ -160,11 +158,19 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
 class FranceTVIE(FranceTVBaseInfoExtractor):
     IE_NAME = 'francetv'
     IE_DESC = 'France 2, 3, 4, 5 and Ô'
-    _VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
-        (?:
-            emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
-        |   (emissions?|jt)/(?P<key>[^/?]+)
-        )'''
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:www\.)?france[2345o]\.fr/
+                                (?:
+                                    emissions/[^/]+/(?:videos|diffusions)?|
+                                    videos
+                                )
+                            /|
+                            embed\.francetv\.fr/\?ue=
+                        )
+                        (?P<id>[^/?]+)
+                    '''
 
     _TESTS = [
         # france2
@@ -232,13 +238,34 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
                 'timestamp': 1410822000,
             },
         },
+        {
+            # francetv embed
+            'url': 'http://embed.francetv.fr/?ue=8d7d3da1e3047c42ade5a5d7dfd3fc87',
+            'info_dict': {
+                'id': 'EV_30231',
+                'ext': 'flv',
+                'title': 'Alcaline, le concert avec Calogero',
+                'description': 'md5:',
+                'upload_date': '20150226',
+                'timestamp': 1424989860,
+                'duration': 5400,
+            },
+        },
+        {
+            'url': 'http://www.france4.fr/emission/highlander/diffusion-du-17-07-2015-04h05',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.franceo.fr/videos/125377617',
+            'only_matching': True,
+        }
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        webpage = self._download_webpage(url, mobj.group('key') or mobj.group('id'))
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
         video_id, catalogue = self._html_search_regex(
-            r'href="http://videos\.francetv\.fr/video/([^@]+@[^"]+)"',
+            r'href="http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
             webpage, 'video ID').split('@')
         return self._extract_video(video_id, catalogue)