X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=69d5f687cbcfc913c1ee8ae3d8bc0a530b7202f0;hb=bcc069a937ca51b85e57fee61eff2f45f44816ac;hp=3213abacfc95f9e9dced77c2b8d9d5eac8e9c335;hpb=9732d77ed273406afcf9ed3ccb4d109824c9c69d;p=youtube-dl.git diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 3213abacf..69d5f687c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -18,6 +18,7 @@ from ..utils import ( clean_html, compiled_regex_type, ExtractorError, + int_or_none, RegexNotFoundError, sanitize_filename, unescapeHTML, @@ -83,6 +84,12 @@ class InfoExtractor(object): format, irrespective of the file format. -1 for default (order by other properties), -2 or smaller for less than default. + * http_referer HTTP Referer header value to set. + * http_method HTTP method to use for the download. + * http_headers A dictionary of additional HTTP headers + to add to the request. + * http_post_data Additional data to send with a POST + request. url: Final video URL. ext: Video filename extension. format: The video format, defaults to ext (used for --get-format) @@ -301,8 +308,12 @@ class InfoExtractor(object): def _download_json(self, url_or_request, video_id, note=u'Downloading JSON metadata', errnote=u'Unable to download JSON metadata', - transform_source=None): - json_string = self._download_webpage(url_or_request, video_id, note, errnote) + transform_source=None, + fatal=True): + json_string = self._download_webpage( + url_or_request, video_id, note, errnote, fatal=fatal) + if (not fatal) and json_string is False: + return None if transform_source: json_string = transform_source(json_string) try: @@ -369,7 +380,8 @@ class InfoExtractor(object): else: for p in pattern: mobj = re.search(p, string, flags) - if mobj: break + if mobj: + break if os.name != 'nt' and sys.stderr.isatty(): _name = u'\033[0;34m%s\033[0m' % name @@ -428,6 +440,22 @@ class InfoExtractor(object): return (username, password) + def _get_tfa_info(self): + """ + Get the two-factor authentication info + TODO - asking the user will be required for sms/phone verify + currently just uses the command line option + If there's no info available, return None + """ + if self._downloader is None: + return None + downloader_params = self._downloader.params + + if downloader_params.get('twofactor', None) is not None: + return downloader_params['twofactor'] + + return None + # Helper functions for extracting OpenGraph info @staticmethod def _og_regexes(prop): @@ -457,8 +485,9 @@ class InfoExtractor(object): return self._og_search_property('title', html, **kargs) def _og_search_video_url(self, html, name='video url', secure=True, **kargs): - regexes = self._og_regexes('video') - if secure: regexes = self._og_regexes('video:secure_url') + regexes + regexes = self._og_regexes('video') + self._og_regexes('video:url') + if secure: + regexes = self._og_regexes('video:secure_url') + regexes return self._html_search_regex(regexes, html, name, **kargs) def _og_search_url(self, html, **kargs): @@ -469,7 +498,7 @@ class InfoExtractor(object): display_name = name return self._html_search_regex( r'''(?ix)]+(?:itemprop|name|property)=["\']%s["\']) + (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?) [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), html, display_name, fatal=fatal, **kwargs) @@ -585,6 +614,28 @@ class InfoExtractor(object): self.to_screen(msg) time.sleep(timeout) + def _extract_f4m_formats(self, manifest_url, video_id): + manifest = self._download_xml( + manifest_url, video_id, 'Downloading f4m manifest', + 'Unable to download f4m manifest') + + formats = [] + media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media') + for i, media_el in enumerate(media_nodes): + tbr = int_or_none(media_el.attrib.get('bitrate')) + format_id = 'f4m-%d' % (i if tbr is None else tbr) + formats.append({ + 'format_id': format_id, + 'url': manifest_url, + 'ext': 'flv', + 'tbr': tbr, + 'width': int_or_none(media_el.attrib.get('width')), + 'height': int_or_none(media_el.attrib.get('height')), + }) + self._sort_formats(formats) + + return formats + class SearchInfoExtractor(InfoExtractor): """