]> gitweb @ CieloNegro.org - youtube-dl.git/blobdiff - youtube_dl/extractor/ard.py
[ard] beta mediathek: make regexp for JSON more robust
[youtube-dl.git] / youtube_dl / extractor / ard.py
index 9c6be2dd920fc9747a640a3971df1b1bada82a74..dcb347849a10618d43d6fa14941a95154bda3e76 100644 (file)
@@ -307,7 +307,7 @@ class ARDBetaMediathekIE(InfoExtractor):
         display_id = mobj.group('display_id')
 
         webpage = self._download_webpage(url, display_id)
-        data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);', webpage, 'json')
+        data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
         data = self._parse_json(data_json, display_id)
 
         res = {
@@ -335,10 +335,24 @@ class ARDBetaMediathekIE(InfoExtractor):
                     'url': widget['_subtitleUrl'],
                 }]}
             if '_quality' in widget:
-                formats.append({
-                    'format_id': widget['_quality'],
-                    'url': widget['_stream']['json'][0],
-                })
+                format_url = widget['_stream']['json'][0]
+
+                if format_url.endswith('.f4m'):
+                    # Skip f4m - these URLs just return a 403
+                    formats.append({
+                        'format_id': 'f4m-' + widget['_quality'],
+                        'url': format_url,
+                        'preference': -1001,
+                    })
+                elif format_url.endswith('m3u8'):
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+                else:
+                    formats.append({
+                        'format_id': 'http-' + widget['_quality'],
+                        'url': format_url,
+                        'preference': 10,  # Plain HTTP, that's nice
+                    })
 
         self._sort_formats(formats)
         res['formats'] = formats