parse_m3u8_attributes,
extract_attributes,
parse_codecs,
+ urljoin,
)
uploader_url: Full URL to a personal webpage of the video uploader.
location: Physical location where the video was filmed.
subtitles: The available subtitles as a dictionary in the format
- {language: subformats}. "subformats" is a list sorted from
- lower to higher preference, each element is a dictionary
- with the "ext" entry and one of:
+ {tag: subformats}. "tag" is usually a language code, and
+ "subformats" is a list sorted from lower to higher
+ preference, each element is a dictionary with the "ext"
+ entry and one of:
* "data": The subtitles file contents
* "url": A URL pointing to the subtitles file
"ext" will be calculated from URL if missing
'protocol': entry_protocol,
'preference': preference,
}]
- audio_groups = set()
+ audio_in_video_stream = {}
last_info = {}
last_media = {}
for line in m3u8_doc.splitlines():
media = parse_m3u8_attributes(line)
media_type = media.get('TYPE')
if media_type in ('VIDEO', 'AUDIO'):
+ group_id = media.get('GROUP-ID')
media_url = media.get('URI')
if media_url:
format_id = []
- for v in (media.get('GROUP-ID'), media.get('NAME')):
+ for v in (group_id, media.get('NAME')):
if v:
format_id.append(v)
f = {
}
if media_type == 'AUDIO':
f['vcodec'] = 'none'
- audio_groups.add(media['GROUP-ID'])
+ if group_id and not audio_in_video_stream.get(group_id):
+ audio_in_video_stream[group_id] = False
formats.append(f)
else:
# When there is no URI in EXT-X-MEDIA let this tag's
# data be used by regular URI lines below
last_media = media
+ if media_type == 'AUDIO' and group_id:
+ audio_in_video_stream[group_id] = True
elif line.startswith('#') or not line.strip():
continue
else:
'abr': abr,
})
f.update(parse_codecs(last_info.get('CODECS')))
- if last_info.get('AUDIO') in audio_groups:
+ if audio_in_video_stream.get(last_info.get('AUDIO')) is False:
# TODO: update acodec for for audio only formats with the same GROUP-ID
f['acodec'] = 'none'
formats.append(f)
extract_Initialization(segment_template)
return ms_info
- def combine_url(base_url, target_url):
- if re.match(r'^https?://', target_url):
- return target_url
- return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
-
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats = []
for period in mpd_doc.findall(_add_ns('Period')):
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
'fps': int_or_none(representation_attrib.get('frameRate')),
- 'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'),
- 'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
'format_note': 'DASH %s' % content_type,
'filesize': filesize,
}
+ f.update(parse_codecs(representation_attrib.get('codecs')))
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
f['fragments'].append({'url': initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
for fragment in f['fragments']:
- fragment['url'] = combine_url(base_url, fragment['url'])
+ fragment['url'] = urljoin(base_url, fragment['url'])
try:
existing_format = next(
fo for fo in formats
})
return formats
- def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
+ def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None):
def absolute_url(video_url):
return compat_urlparse.urljoin(base_url, video_url)
def _media_formats(src, cur_media_type):
full_url = absolute_url(src)
- if determine_ext(full_url) == 'm3u8':
+ ext = determine_ext(full_url)
+ if ext == 'm3u8':
is_plain_url = False
formats = self._extract_m3u8_formats(
full_url, video_id, ext='mp4',
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
+ elif ext == 'mpd':
+ is_plain_url = False
+ formats = self._extract_mpd_formats(
+ full_url, video_id, mpd_id=mpd_id)
else:
is_plain_url = True
formats = [{
entries.append(media_info)
return entries
- def _extract_akamai_formats(self, manifest_url, video_id):
+ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
formats = []
hdcore_sign = 'hdcore=3.7.0'
- f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+ f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+ hds_host = hosts.get('hds')
+ if hds_host:
+ f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
if 'hdcore=' not in f4m_url:
f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
f4m_formats = self._extract_f4m_formats(
for entry in f4m_formats:
entry.update({'extra_param_to_segment_url': hdcore_sign})
formats.extend(f4m_formats)
- m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+ m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+ hls_host = hosts.get('hls')
+ if hls_host:
+ m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))