]> gitweb @ CieloNegro.org - youtube-dl.git/blobdiff - youtube_dl/extractor/common.py
[options] Include custom conf in final argv (closes #11741)
[youtube-dl.git] / youtube_dl / extractor / common.py
index 58da2702526be72dc9c9415919d97e34375f064a..dce8c7d0d5ad389aa84bf84f25731a2e680e91e3 100644 (file)
@@ -189,9 +189,10 @@ class InfoExtractor(object):
     uploader_url:   Full URL to a personal webpage of the video uploader.
     location:       Physical location where the video was filmed.
     subtitles:      The available subtitles as a dictionary in the format
-                    {language: subformats}. "subformats" is a list sorted from
-                    lower to higher preference, each element is a dictionary
-                    with the "ext" entry and one of:
+                    {tag: subformats}. "tag" is usually a language code, and
+                    "subformats" is a list sorted from lower to higher
+                    preference, each element is a dictionary with the "ext"
+                    entry and one of:
                         * "data": The subtitles file contents
                         * "url": A URL pointing to the subtitles file
                     "ext" will be calculated from URL if missing
@@ -1225,7 +1226,7 @@ class InfoExtractor(object):
                 'protocol': entry_protocol,
                 'preference': preference,
             }]
-        audio_groups = set()
+        audio_in_video_stream = {}
         last_info = {}
         last_media = {}
         for line in m3u8_doc.splitlines():
@@ -1235,10 +1236,11 @@ class InfoExtractor(object):
                 media = parse_m3u8_attributes(line)
                 media_type = media.get('TYPE')
                 if media_type in ('VIDEO', 'AUDIO'):
+                    group_id = media.get('GROUP-ID')
                     media_url = media.get('URI')
                     if media_url:
                         format_id = []
-                        for v in (media.get('GROUP-ID'), media.get('NAME')):
+                        for v in (group_id, media.get('NAME')):
                             if v:
                                 format_id.append(v)
                         f = {
@@ -1251,12 +1253,15 @@ class InfoExtractor(object):
                         }
                         if media_type == 'AUDIO':
                             f['vcodec'] = 'none'
-                            audio_groups.add(media['GROUP-ID'])
+                            if group_id and not audio_in_video_stream.get(group_id):
+                                audio_in_video_stream[group_id] = False
                         formats.append(f)
                     else:
                         # When there is no URI in EXT-X-MEDIA let this tag's
                         # data be used by regular URI lines below
                         last_media = media
+                        if media_type == 'AUDIO' and group_id:
+                            audio_in_video_stream[group_id] = True
             elif line.startswith('#') or not line.strip():
                 continue
             else:
@@ -1300,7 +1305,7 @@ class InfoExtractor(object):
                         'abr': abr,
                     })
                 f.update(parse_codecs(last_info.get('CODECS')))
-                if last_info.get('AUDIO') in audio_groups:
+                if audio_in_video_stream.get(last_info.get('AUDIO')) is False:
                     # TODO: update acodec for for audio only formats with the same GROUP-ID
                     f['acodec'] = 'none'
                 formats.append(f)
@@ -1962,10 +1967,13 @@ class InfoExtractor(object):
                 entries.append(media_info)
         return entries
 
-    def _extract_akamai_formats(self, manifest_url, video_id):
+    def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
         formats = []
         hdcore_sign = 'hdcore=3.7.0'
-        f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+        f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+        hds_host = hosts.get('hds')
+        if hds_host:
+            f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
         if 'hdcore=' not in f4m_url:
             f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
         f4m_formats = self._extract_f4m_formats(
@@ -1973,7 +1981,10 @@ class InfoExtractor(object):
         for entry in f4m_formats:
             entry.update({'extra_param_to_segment_url': hdcore_sign})
         formats.extend(f4m_formats)
-        m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+        m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+        hls_host = hosts.get('hls')
+        if hls_host:
+            m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
         formats.extend(self._extract_m3u8_formats(
             m3u8_url, video_id, 'mp4', 'm3u8_native',
             m3u8_id='hls', fatal=False))