X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=5eeeda08d3b2cda957106d2ee512eeee3ed55e02;hb=b58a22b9632a226fc453bb52f195e7577476f35e;hp=9b4775e0a4bf902f8e77e050ba6679a24b0c6534;hpb=e0b9d78fab76e2c2819c8a9a7512ad4533319b72;p=youtube-dl.git
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 9b4775e0a..5eeeda08d 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -15,6 +15,7 @@ import xml.etree.ElementTree
from ..compat import (
compat_cookiejar,
compat_cookies,
+ compat_getpass,
compat_HTTPError,
compat_http_client,
compat_urllib_error,
@@ -509,6 +510,12 @@ class InfoExtractor(object):
"""Report attempt to log in."""
self.to_screen('Logging in')
+ @staticmethod
+ def raise_login_required(msg='This video is only available for registered users'):
+ raise ExtractorError(
+ '%s. Use --username and --password or --netrc to provide account credentials.' % msg,
+ expected=True)
+
# Methods for following #608
@staticmethod
def url_result(url, ie=None, video_id=None, video_title=None):
@@ -610,7 +617,7 @@ class InfoExtractor(object):
return (username, password)
- def _get_tfa_info(self):
+ def _get_tfa_info(self, note='two-factor verification code'):
"""
Get the two-factor authentication info
TODO - asking the user will be required for sms/phone verify
@@ -624,7 +631,7 @@ class InfoExtractor(object):
if downloader_params.get('twofactor', None) is not None:
return downloader_params['twofactor']
- return None
+ return compat_getpass('Type %s and press [Return]: ' % note)
# Helper functions for extracting OpenGraph info
@staticmethod
@@ -640,7 +647,7 @@ class InfoExtractor(object):
@staticmethod
def _meta_regex(prop):
return r'''(?isx)]+(?:itemprop|name|property|id)=(["\']?)%s\1)
+ (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
[^>]+?content=(["\'])(?P.*?)\2''' % re.escape(prop)
def _og_search_property(self, prop, html, name=None, **kargs):
@@ -724,16 +731,18 @@ class InfoExtractor(object):
@staticmethod
def _hidden_inputs(html):
- return dict([
- (input.group('name'), input.group('value')) for input in re.finditer(
- r'''(?x)
- ["\'])hidden(?P=q_hidden)\s+
- name=(?P["\'])(?P.+?)(?P=q_name)\s+
- (?:id=(?P["\']).+?(?P=q_id)\s+)?
- value=(?P["\'])(?P.*?)(?P=q_value)
- ''', html)
- ])
+ hidden_inputs = {}
+ for input in re.findall(r']+)>', html):
+ if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
+ continue
+ name = re.search(r'name=(["\'])(?P.+?)\1', input)
+ if not name:
+ continue
+ value = re.search(r'value=(["\'])(?P.*?)\1', input)
+ if not value:
+ continue
+ hidden_inputs[name.group('value')] = value.group('value')
+ return hidden_inputs
def _form_hidden_inputs(self, form_id, html):
form = self._search_regex(
@@ -1049,7 +1058,7 @@ class InfoExtractor(object):
return self._search_regex(
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
- def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None):
+ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
base = smil_url
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
b = meta.get('base') or meta.get('httpBase')
@@ -1088,6 +1097,12 @@ class InfoExtractor(object):
'width': width,
'height': height,
})
+ if transform_rtmp_url:
+ streamer, src = transform_rtmp_url(streamer, src)
+ formats[-1].update({
+ 'url': streamer,
+ 'play_path': src,
+ })
continue
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
@@ -1126,7 +1141,7 @@ class InfoExtractor(object):
return formats
- def _parse_smil_subtitles(self, smil, namespace=None):
+ def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
subtitles = {}
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
src = textstream.get('src')
@@ -1135,20 +1150,29 @@ class InfoExtractor(object):
ext = textstream.get('ext') or determine_ext(src)
if not ext:
type_ = textstream.get('type')
- if type_ == 'text/srt':
- ext = 'srt'
- lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName')
+ SUBTITLES_TYPES = {
+ 'text/vtt': 'vtt',
+ 'text/srt': 'srt',
+ 'application/smptett+xml': 'tt',
+ }
+ if type_ in SUBTITLES_TYPES:
+ ext = SUBTITLES_TYPES[type_]
+ lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
subtitles.setdefault(lang, []).append({
'url': src,
'ext': ext,
})
return subtitles
- def _extract_xspf_playlist(self, playlist_url, playlist_id):
- playlist = self._download_xml(
+ def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
+ xspf = self._download_xml(
playlist_url, playlist_id, 'Downloading xpsf playlist',
- 'Unable to download xspf manifest')
+ 'Unable to download xspf manifest', fatal=fatal)
+ if xspf is False:
+ return []
+ return self._parse_xspf(xspf, playlist_id)
+ def _parse_xspf(self, playlist, playlist_id):
NS_MAP = {
'xspf': 'http://xspf.org/ns/0/',
's1': 'http://static.streamone.nl/player/ns/0',
@@ -1157,7 +1181,7 @@ class InfoExtractor(object):
entries = []
for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
title = xpath_text(
- track, xpath_with_ns('./xspf:title', NS_MAP), 'title')
+ track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
description = xpath_text(
track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
thumbnail = xpath_text(
@@ -1261,6 +1285,23 @@ class InfoExtractor(object):
def _get_subtitles(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
+ @staticmethod
+ def _merge_subtitle_items(subtitle_list1, subtitle_list2):
+ """ Merge subtitle items for one language. Items with duplicated URLs
+ will be dropped. """
+ list1_urls = set([item['url'] for item in subtitle_list1])
+ ret = list(subtitle_list1)
+ ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
+ return ret
+
+ @classmethod
+ def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
+ """ Merge two subtitle dictionaries, language by language. """
+ ret = dict(subtitle_dict1)
+ for lang in subtitle_dict2:
+ ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
+ return ret
+
def extract_automatic_captions(self, *args, **kwargs):
if (self._downloader.params.get('writeautomaticsub', False) or
self._downloader.params.get('listsubtitles')):