Merge pull request #14225 from Tithen-Firion/openload-phantomjs-method

author Yen Chi Hsuan <yan12125@gmail.com>

Fri, 15 Sep 2017 18:28:28 +0000 (02:28 +0800)

committer GitHub <noreply@github.com>

Fri, 15 Sep 2017 18:28:28 +0000 (02:28 +0800)
author Yen Chi Hsuan <yan12125@gmail.com>
Fri, 15 Sep 2017 18:28:28 +0000 (02:28 +0800)
committer GitHub <noreply@github.com>
Fri, 15 Sep 2017 18:28:28 +0000 (02:28 +0800)
diff --combined youtube_dl/YoutubeDL.py

index 5405a87c5f377d7673b08e5560546d639614a6ca,033b50702c7cdb8056f11e2f60f96363416b4c2e..bfb4ff225f2376c09b2e595276244d887a5e879c
--- 1/youtube_dl/YoutubeDL.py
--- 2/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@@ -26,8 -26,6 +26,8 @@@ import tokeniz
   import traceback
   import random
   
+ +from string import ascii_letters
+ +
   from .compat import (
       compat_basestring,
       compat_cookiejar,
@@@ -60,7 -58,6 +60,7 @@@ from .utils import 
       format_bytes,
       formatSeconds,
       GeoRestrictedError,
+ +    int_or_none,
       ISO3166Utils,
       locked_file,
       make_HTTPS_handler,
@@@ -89,6 -86,7 +89,7 @@@
       write_string,
       YoutubeDLCookieProcessor,
       YoutubeDLHandler,
+     PhantomJSwrapper,
   )
   from .cache import Cache
   from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
@@@ -305,17 -303,6 +306,17 @@@ class YoutubeDL(object)
                           postprocessor.
       """
   
+ +    _NUMERIC_FIELDS = set((
+ +        'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
+ +        'timestamp', 'upload_year', 'upload_month', 'upload_day',
+ +        'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
+ +        'average_rating', 'comment_count', 'age_limit',
+ +        'start_time', 'end_time',
+ +        'chapter_number', 'season_number', 'episode_number',
+ +        'track_number', 'disc_number', 'release_year',
+ +        'playlist_index',
+ +    ))
+ +
       params = None
       _ies = []
       _pps = []
@@@ -384,10 -371,10 +385,10 @@@
                   else:
                       raise
   
- -        if (sys.version_info >= (3,) and sys.platform != 'win32' and
+ +        if (sys.platform != 'win32' and
                   sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
                   not params.get('restrictfilenames', False)):
- -            # On Python 3, the Unicode filesystem API will throw errors (#1474)
+ +            # Unicode filesystem API will throw errors (#1474, #13027)
               self.report_warning(
                   'Assuming --restrict-filenames since file system encoding '
                   'cannot encode all characters. '
@@@ -512,25 -499,24 +513,25 @@@
       def to_console_title(self, message):
           if not self.params.get('consoletitle', False):
               return
- -        if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
- -            # c_wchar_p() might not be necessary if `message` is
- -            # already of type unicode()
- -            ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
+ +        if compat_os_name == 'nt':
+ +            if ctypes.windll.kernel32.GetConsoleWindow():
+ +                # c_wchar_p() might not be necessary if `message` is
+ +                # already of type unicode()
+ +                ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
           elif 'TERM' in os.environ:
               self._write_string('\033]0;%s\007' % message, self._screen_file)
   
       def save_console_title(self):
           if not self.params.get('consoletitle', False):
               return
- -        if 'TERM' in os.environ:
+ +        if compat_os_name != 'nt' and 'TERM' in os.environ:
               # Save the title on stack
               self._write_string('\033[22;0t', self._screen_file)
   
       def restore_console_title(self):
           if not self.params.get('consoletitle', False):
               return
- -        if 'TERM' in os.environ:
+ +        if compat_os_name != 'nt' and 'TERM' in os.environ:
               # Restore the title from stack
               self._write_string('\033[23;0t', self._screen_file)
   
@@@ -653,11 -639,22 +654,11 @@@
                       r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
                       outtmpl)
   
- -            NUMERIC_FIELDS = set((
- -                'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
- -                'timestamp', 'upload_year', 'upload_month', 'upload_day',
- -                'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
- -                'average_rating', 'comment_count', 'age_limit',
- -                'start_time', 'end_time',
- -                'chapter_number', 'season_number', 'episode_number',
- -                'track_number', 'disc_number', 'release_year',
- -                'playlist_index',
- -            ))
- -
               # Missing numeric fields used together with integer presentation types
               # in format specification will break the argument substitution since
               # string 'NA' is returned for missing fields. We will patch output
               # template for missing fields to meet string presentation type.
- -            for numeric_field in NUMERIC_FIELDS:
+ +            for numeric_field in self._NUMERIC_FIELDS:
                   if numeric_field not in template_dict:
                       # As of [1] format syntax is:
                       #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
@@@ -676,19 -673,7 +677,19 @@@
                           FORMAT_RE.format(numeric_field),
                           r'%({0})s'.format(numeric_field), outtmpl)
   
- -            filename = expand_path(outtmpl % template_dict)
+ +            # expand_path translates '%%' into '%' and '$$' into '$'
+ +            # correspondingly that is not what we want since we need to keep
+ +            # '%%' intact for template dict substitution step. Working around
+ +            # with boundary-alike separator hack.
+ +            sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
+ +            outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+ +
+ +            # outtmpl should be expand_path'ed before template dict substitution
+ +            # because meta fields may contain env variables we don't want to
+ +            # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
+ +            # title "Hello $PATH", we don't want `$PATH` to be expanded.
+ +            filename = expand_path(outtmpl).replace(sep, '') % template_dict
+ +
               # Temporary fix for #4787
               # 'Treat' all problem characters by passing filename through preferredencoding
               # to workaround encoding issues with subprocess on python2 @ Windows
@@@ -860,7 -845,7 +861,7 @@@
   
               force_properties = dict(
                   (k, v) for k, v in ie_result.items() if v is not None)
- -            for f in ('_type', 'url', 'ie_key'):
+ +            for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
                   if f in force_properties:
                       del force_properties[f]
               new_result = info.copy()
@@@ -1064,25 -1049,6 +1065,25 @@@
               return op(actual_value, comparison_value)
           return _filter
   
+ +    def _default_format_spec(self, info_dict, download=True):
+ +        req_format_list = []
+ +
+ +        def can_have_partial_formats():
+ +            if self.params.get('simulate', False):
+ +                return True
+ +            if not download:
+ +                return True
+ +            if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+ +                return False
+ +            if info_dict.get('is_live'):
+ +                return False
+ +            merger = FFmpegMergerPP(self)
+ +            return merger.available and merger.can_merge()
+ +        if can_have_partial_formats():
+ +            req_format_list.append('bestvideo+bestaudio')
+ +        req_format_list.append('best')
+ +        return '/'.join(req_format_list)
+ +
       def build_format_selector(self, format_spec):
           def syntax_error(note, start):
               message = (
@@@ -1379,28 -1345,9 +1380,28 @@@
           if 'title' not in info_dict:
               raise ExtractorError('Missing "title" field in extractor result')
   
- -        if not isinstance(info_dict['id'], compat_str):
- -            self.report_warning('"id" field is not a string - forcing string conversion')
- -            info_dict['id'] = compat_str(info_dict['id'])
+ +        def report_force_conversion(field, field_not, conversion):
+ +            self.report_warning(
+ +                '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
+ +                % (field, field_not, conversion))
+ +
+ +        def sanitize_string_field(info, string_field):
+ +            field = info.get(string_field)
+ +            if field is None or isinstance(field, compat_str):
+ +                return
+ +            report_force_conversion(string_field, 'a string', 'string')
+ +            info[string_field] = compat_str(field)
+ +
+ +        def sanitize_numeric_fields(info):
+ +            for numeric_field in self._NUMERIC_FIELDS:
+ +                field = info.get(numeric_field)
+ +                if field is None or isinstance(field, compat_numeric_types):
+ +                    continue
+ +                report_force_conversion(numeric_field, 'numeric', 'int')
+ +                info[numeric_field] = int_or_none(field)
+ +
+ +        sanitize_string_field(info_dict, 'id')
+ +        sanitize_numeric_fields(info_dict)
   
           if 'playlist' not in info_dict:
               # It isn't part of a playlist
@@@ -1481,28 -1428,16 +1482,28 @@@
           if not formats:
               raise ExtractorError('No video formats found!')
   
+ +        def is_wellformed(f):
+ +            url = f.get('url')
+ +            if not url:
+ +                self.report_warning(
+ +                    '"url" field is missing or empty - skipping format, '
+ +                    'there is an error in extractor')
+ +                return False
+ +            if isinstance(url, bytes):
+ +                sanitize_string_field(f, 'url')
+ +            return True
+ +
+ +        # Filter out malformed formats for better extraction robustness
+ +        formats = list(filter(is_wellformed, formats))
+ +
           formats_dict = {}
   
           # We check that all the formats have the format and format_id fields
           for i, format in enumerate(formats):
- -            if 'url' not in format:
- -                raise ExtractorError('Missing "url" key in result (index %d)' % i)
- -
+ +            sanitize_string_field(format, 'format_id')
+ +            sanitize_numeric_fields(format)
               format['url'] = sanitize_url(format['url'])
- -
- -            if format.get('format_id') is None:
+ +            if not format.get('format_id'):
                   format['format_id'] = compat_str(i)
               else:
                   # Sanitize format_id from characters used in format selector expression
@@@ -1555,10 -1490,14 +1556,10 @@@
   
           req_format = self.params.get('format')
           if req_format is None:
- -            req_format_list = []
- -            if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
- -                    not info_dict.get('is_live')):
- -                merger = FFmpegMergerPP(self)
- -                if merger.available and merger.can_merge():
- -                    req_format_list.append('bestvideo+bestaudio')
- -            req_format_list.append('best')
- -            req_format = '/'.join(req_format_list)
+ +            req_format = self._default_format_spec(info_dict, download=download)
+ +            if self.params.get('verbose'):
+ +                self.to_stdout('[debug] Default format spec: %s' % req_format)
+ +
           format_selector = self.build_format_selector(req_format)
   
           # While in format selection we may need to have an access to the original
@@@ -1710,17 -1649,12 +1711,17 @@@
           if filename is None:
               return
   
- -        try:
- -            dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
- -            if dn and not os.path.exists(dn):
- -                os.makedirs(dn)
- -        except (OSError, IOError) as err:
- -            self.report_error('unable to create directory ' + error_to_compat_str(err))
+ +        def ensure_dir_exists(path):
+ +            try:
+ +                dn = os.path.dirname(path)
+ +                if dn and not os.path.exists(dn):
+ +                    os.makedirs(dn)
+ +                return True
+ +            except (OSError, IOError) as err:
+ +                self.report_error('unable to create directory ' + error_to_compat_str(err))
+ +                return False
+ +
+ +        if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
               return
   
           if self.params.get('writedescription', False):
@@@ -1763,30 -1697,29 +1764,30 @@@
               ie = self.get_info_extractor(info_dict['extractor_key'])
               for sub_lang, sub_info in subtitles.items():
                   sub_format = sub_info['ext']
- -                if sub_info.get('data') is not None:
- -                    sub_data = sub_info['data']
+ +                sub_filename = subtitles_filename(filename, sub_lang, sub_format)
+ +                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+ +                    self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
                   else:
- -                    try:
- -                        sub_data = ie._download_webpage(
- -                            sub_info['url'], info_dict['id'], note=False)
- -                    except ExtractorError as err:
- -                        self.report_warning('Unable to download subtitle for "%s": %s' %
- -                                            (sub_lang, error_to_compat_str(err.cause)))
- -                        continue
- -                try:
- -                    sub_filename = subtitles_filename(filename, sub_lang, sub_format)
- -                    if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
- -                        self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
+ +                    self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
+ +                    if sub_info.get('data') is not None:
+ +                        try:
+ +                            # Use newline='' to prevent conversion of newline characters
+ +                            # See https://github.com/rg3/youtube-dl/issues/10268
+ +                            with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
+ +                                subfile.write(sub_info['data'])
+ +                        except (OSError, IOError):
+ +                            self.report_error('Cannot write subtitles file ' + sub_filename)
+ +                            return
                       else:
- -                        self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
- -                        # Use newline='' to prevent conversion of newline characters
- -                        # See https://github.com/rg3/youtube-dl/issues/10268
- -                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
- -                            subfile.write(sub_data)
- -                except (OSError, IOError):
- -                    self.report_error('Cannot write subtitles file ' + sub_filename)
- -                    return
+ +                        try:
+ +                            sub_data = ie._request_webpage(
+ +                                sub_info['url'], info_dict['id'], note=False).read()
+ +                            with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+ +                                subfile.write(sub_data)
+ +                        except (ExtractorError, IOError, OSError, ValueError) as err:
+ +                            self.report_warning('Unable to download subtitle for "%s": %s' %
+ +                                                (sub_lang, error_to_compat_str(err)))
+ +                            continue
   
           if self.params.get('writeinfojson', False):
               infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
@@@ -1859,11 -1792,8 +1860,11 @@@
                           for f in requested_formats:
                               new_info = dict(info_dict)
                               new_info.update(f)
- -                            fname = self.prepare_filename(new_info)
- -                            fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
+ +                            fname = prepend_extension(
+ +                                self.prepare_filename(new_info),
+ +                                'f%s' % f['format_id'], new_info['ext'])
+ +                            if not ensure_dir_exists(fname):
+ +                                return
                               downloaded.append(fname)
                               partial_success = dl(fname, new_info)
                               success = success and partial_success
@@@ -1930,7 -1860,7 +1931,7 @@@
                           info_dict.get('protocol') == 'm3u8' and
                           self.params.get('hls_prefer_native')):
                       if fixup_policy == 'warn':
- -                        self.report_warning('%s: malformated aac bitstream.' % (
+ +                        self.report_warning('%s: malformed AAC bitstream detected.' % (
                               info_dict['id']))
                       elif fixup_policy == 'detect_or_warn':
                           fixup_pp = FFmpegFixupM3u8PP(self)
@@@ -1939,7 -1869,7 +1940,7 @@@
                               info_dict['__postprocessors'].append(fixup_pp)
                           else:
                               self.report_warning(
- -                                '%s: malformated aac bitstream. %s'
+ +                                '%s: malformed AAC bitstream detected. %s'
                                   % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
                       else:
                           assert fixup_policy in ('ignore', 'never')
@@@ -2217,6 -2147,7 +2218,7 @@@
   
           exe_versions = FFmpegPostProcessor.get_versions(self)
           exe_versions['rtmpdump'] = rtmpdump_version()
+         exe_versions['phantomjs'] = PhantomJSwrapper._version()
           exe_str = ', '.join(
               '%s %s' % (exe, v)
               for exe, v in sorted(exe_versions.items())
diff --combined youtube_dl/extractor/common.py

index 74d30ec50ac7f9d5db44d46bab1dd482fcc34f59,76b5378e97620137c286cf156b6e2ac790f04a8a..317a9a76fc417e9ad4455bc99b30e782849eeabc
--- 1/youtube_dl/extractor/common.py
--- 2/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@@ -27,7 -27,6 +27,7 @@@ from ..compat import 
       compat_urllib_parse_urlencode,
       compat_urllib_request,
       compat_urlparse,
+ +    compat_xml_parse_error,
   )
   from ..downloader.f4m import remove_encrypted_media
   from ..utils import (
@@@ -377,7 -376,7 +377,7 @@@ class InfoExtractor(object)
               cls._VALID_URL_RE = re.compile(cls._VALID_URL)
           m = cls._VALID_URL_RE.match(url)
           assert m
- -        return m.group('id')
+ +        return compat_str(m.group('id'))
   
       @classmethod
       def working(cls):
@@@ -421,7 -420,7 +421,7 @@@
               if country_code:
                   self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
                   if self._downloader.params.get('verbose', False):
- -                    self._downloader.to_stdout(
+ +                    self._downloader.to_screen(
                           '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
                           % (self._x_forwarded_for_ip, country_code.upper()))
   
@@@ -647,29 -646,15 +647,29 @@@
   
       def _download_xml(self, url_or_request, video_id,
                         note='Downloading XML', errnote='Unable to download XML',
- -                      transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}):
+ +                      transform_source=None, fatal=True, encoding=None,
+ +                      data=None, headers={}, query={}):
           """Return the xml as an xml.etree.ElementTree.Element"""
           xml_string = self._download_webpage(
- -            url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
+ +            url_or_request, video_id, note, errnote, fatal=fatal,
+ +            encoding=encoding, data=data, headers=headers, query=query)
           if xml_string is False:
               return xml_string
+ +        return self._parse_xml(
+ +            xml_string, video_id, transform_source=transform_source,
+ +            fatal=fatal)
+ +
+ +    def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
           if transform_source:
               xml_string = transform_source(xml_string)
- -        return compat_etree_fromstring(xml_string.encode('utf-8'))
+ +        try:
+ +            return compat_etree_fromstring(xml_string.encode('utf-8'))
+ +        except compat_xml_parse_error as ve:
+ +            errmsg = '%s: Failed to parse XML ' % video_id
+ +            if fatal:
+ +                raise ExtractorError(errmsg, cause=ve)
+ +            else:
+ +                self.report_warning(errmsg + str(ve))
   
       def _download_json(self, url_or_request, video_id,
                          note='Downloading JSON metadata',
@@@ -745,12 -730,12 +745,12 @@@
               video_info['title'] = video_title
           return video_info
   
- -    def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None):
- -        urlrs = orderedSet(
+ +    def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
+ +        urls = orderedSet(
               self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
               for m in matches)
           return self.playlist_result(
- -            urlrs, playlist_id=video_id, playlist_title=video_title)
+ +            urls, playlist_id=playlist_id, playlist_title=playlist_title)
   
       @staticmethod
       def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
@@@ -955,8 -940,7 +955,8 @@@
   
       def _family_friendly_search(self, html):
           # See http://schema.org/VideoObject
- -        family_friendly = self._html_search_meta('isFamilyFriendly', html)
+ +        family_friendly = self._html_search_meta(
+ +            'isFamilyFriendly', html, default=None)
   
           if not family_friendly:
               return None
@@@ -1018,17 -1002,17 +1018,17 @@@
                   item_type = e.get('@type')
                   if expected_type is not None and expected_type != item_type:
                       return info
- -                if item_type == 'TVEpisode':
+ +                if item_type in ('TVEpisode', 'Episode'):
                       info.update({
                           'episode': unescapeHTML(e.get('name')),
                           'episode_number': int_or_none(e.get('episodeNumber')),
                           'description': unescapeHTML(e.get('description')),
                       })
                       part_of_season = e.get('partOfSeason')
- -                    if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
+ +                    if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
                           info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
                       part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
- -                    if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
+ +                    if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
                           info['series'] = unescapeHTML(part_of_series.get('name'))
                   elif item_type == 'Article':
                       info.update({
@@@ -1038,10 -1022,10 +1038,10 @@@
                       })
                   elif item_type == 'VideoObject':
                       extract_video_object(e)
- -                elif item_type == 'WebPage':
- -                    video = e.get('video')
- -                    if isinstance(video, dict) and video.get('@type') == 'VideoObject':
- -                        extract_video_object(video)
+ +                    continue
+ +                video = e.get('video')
+ +                if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+ +                    extract_video_object(video)
                   break
           return dict((k, v) for k, v in info.items() if v is not None)
   
@@@ -1801,7 -1785,7 +1801,7 @@@
                       ms_info['timescale'] = int(timescale)
                   segment_duration = source.get('duration')
                   if segment_duration:
- -                    ms_info['segment_duration'] = int(segment_duration)
+ +                    ms_info['segment_duration'] = float(segment_duration)
   
               def extract_Initialization(source):
                   initialization = source.find(_add_ns('Initialization'))
@@@ -1908,13 -1892,9 +1908,13 @@@
                                   'Bandwidth': bandwidth,
                               }
   
+ +                        def location_key(location):
+ +                            return 'url' if re.match(r'^https?://', location) else 'path'
+ +
                           if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
   
                               media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+ +                            media_location_key = location_key(media_template)
   
                               # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
                               # can't be used at the same time
@@@ -1924,7 -1904,7 +1924,7 @@@
                                       segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
                                       representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
                                   representation_ms_info['fragments'] = [{
- -                                    'url': media_template % {
+ +                                    media_location_key: media_template % {
                                           'Number': segment_number,
                                           'Bandwidth': bandwidth,
                                       },
@@@ -1948,7 -1928,7 +1948,7 @@@
                                           'Number': segment_number,
                                       }
                                       representation_ms_info['fragments'].append({
- -                                        'url': segment_url,
+ +                                        media_location_key: segment_url,
                                           'duration': float_or_none(segment_d, representation_ms_info['timescale']),
                                       })
   
@@@ -1972,9 -1952,8 +1972,9 @@@
                               for s in representation_ms_info['s']:
                                   duration = float_or_none(s['d'], timescale)
                                   for r in range(s.get('r', 0) + 1):
+ +                                    segment_uri = representation_ms_info['segment_urls'][segment_index]
                                       fragments.append({
- -                                        'url': representation_ms_info['segment_urls'][segment_index],
+ +                                        location_key(segment_uri): segment_uri,
                                           'duration': duration,
                                       })
                                       segment_index += 1
@@@ -1983,7 -1962,6 +1983,7 @@@
                           # No fragments key is present in this case.
                           if 'fragments' in representation_ms_info:
                               f.update({
+ +                                'fragment_base_url': base_url,
                                   'fragments': [],
                                   'protocol': 'http_dash_segments',
                               })
@@@ -1991,8 -1969,10 +1991,8 @@@
                                   initialization_url = representation_ms_info['initialization_url']
                                   if not f.get('url'):
                                       f['url'] = initialization_url
- -                                f['fragments'].append({'url': initialization_url})
+ +                                f['fragments'].append({location_key(initialization_url): initialization_url})
                               f['fragments'].extend(representation_ms_info['fragments'])
- -                            for fragment in f['fragments']:
- -                                fragment['url'] = urljoin(base_url, fragment['url'])
                           try:
                               existing_format = next(
                                   fo for fo in formats
@@@ -2021,12 -2001,6 +2021,12 @@@
               compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
   
       def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+ +        """
+ +        Parse formats from ISM manifest.
+ +        References:
+ +         1. [MS-SSTR]: Smooth Streaming Protocol,
+ +            https://msdn.microsoft.com/en-us/library/ff469518.aspx
+ +        """
           if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
               return []
   
@@@ -2048,11 -2022,8 +2048,11 @@@
                       self.report_warning('%s is not a supported codec' % fourcc)
                       continue
                   tbr = int(track.attrib['Bitrate']) // 1000
- -                width = int_or_none(track.get('MaxWidth'))
- -                height = int_or_none(track.get('MaxHeight'))
+ +                # [1] does not mention Width and Height attributes. However,
+ +                # they're often present while MaxWidth and MaxHeight are
+ +                # missing, so should be used as fallbacks
+ +                width = int_or_none(track.get('MaxWidth') or track.get('Width'))
+ +                height = int_or_none(track.get('MaxHeight') or track.get('Height'))
                   sampling_rate = int_or_none(track.get('SamplingRate'))
   
                   track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
@@@ -2130,19 -2101,19 +2130,19 @@@
                   return f
               return {}
   
- -        def _media_formats(src, cur_media_type):
+ +        def _media_formats(src, cur_media_type, type_info={}):
               full_url = absolute_url(src)
- -            ext = determine_ext(full_url)
+ +            ext = type_info.get('ext') or determine_ext(full_url)
               if ext == 'm3u8':
                   is_plain_url = False
                   formats = self._extract_m3u8_formats(
                       full_url, video_id, ext='mp4',
                       entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
- -                    preference=preference)
+ +                    preference=preference, fatal=False)
               elif ext == 'mpd':
                   is_plain_url = False
                   formats = self._extract_mpd_formats(
- -                    full_url, video_id, mpd_id=mpd_id)
+ +                    full_url, video_id, mpd_id=mpd_id, fatal=False)
               else:
                   is_plain_url = True
                   formats = [{
@@@ -2152,18 -2123,15 +2152,18 @@@
               return is_plain_url, formats
   
           entries = []
+ +        # amp-video and amp-audio are very similar to their HTML5 counterparts
+ +        # so we wll include them right here (see
+ +        # https://www.ampproject.org/docs/reference/components/amp-video)
           media_tags = [(media_tag, media_type, '')
                         for media_tag, media_type
- -                      in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
+ +                      in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
           media_tags.extend(re.findall(
               # We only allow video|audio followed by a whitespace or '>'.
               # Allowing more characters may end up in significant slow down (see
               # https://github.com/rg3/youtube-dl/issues/11979, example URL:
               # http://www.porntrex.com/maps/videositemap.xml).
- -            r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
+ +            r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
           for media_tag, media_type, media_content in media_tags:
               media_info = {
                   'formats': [],
@@@ -2181,15 -2149,9 +2181,15 @@@
                       src = source_attributes.get('src')
                       if not src:
                           continue
- -                    is_plain_url, formats = _media_formats(src, media_type)
+ +                    f = parse_content_type(source_attributes.get('type'))
+ +                    is_plain_url, formats = _media_formats(src, media_type, f)
                       if is_plain_url:
- -                        f = parse_content_type(source_attributes.get('type'))
+ +                        # res attribute is not standard but seen several times
+ +                        # in the wild
+ +                        f.update({
+ +                            'height': int_or_none(source_attributes.get('res')),
+ +                            'format_id': source_attributes.get('label'),
+ +                        })
                           f.update(formats[0])
                           media_info['formats'].append(f)
                       else:
@@@ -2212,7 -2174,7 +2212,7 @@@
       def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
           formats = []
           hdcore_sign = 'hdcore=3.7.0'
- -        f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+ +        f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
           hds_host = hosts.get('hds')
           if hds_host:
               f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
@@@ -2234,9 -2196,8 +2234,9 @@@
   
       def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
           url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
- -        url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url')
- -        http_base_url = 'http' + url_base
+ +        url_base = self._search_regex(
+ +            r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
+ +        http_base_url = '%s:%s' % ('http', url_base)
           formats = []
           if 'm3u8' not in skip_protocols:
               formats.extend(self._extract_m3u8_formats(
@@@ -2270,7 -2231,7 +2270,7 @@@
               for protocol in ('rtmp', 'rtsp'):
                   if protocol not in skip_protocols:
                       formats.append({
- -                        'url': protocol + url_base,
+ +                        'url': '%s:%s' % (protocol, url_base),
                           'format_id': protocol,
                           'protocol': protocol,
                       })
@@@ -2328,8 -2289,6 +2328,8 @@@
               tracks = video_data.get('tracks')
               if tracks and isinstance(tracks, list):
                   for track in tracks:
+ +                    if not isinstance(track, dict):
+ +                        continue
                       if track.get('kind') != 'captions':
                           continue
                       track_url = urljoin(base_url, track.get('file'))
@@@ -2359,8 -2318,6 +2359,8 @@@
           urls = []
           formats = []
           for source in jwplayer_sources_data:
+ +            if not isinstance(source, dict):
+ +                continue
               source_url = self._proto_relative_url(source.get('file'))
               if not source_url:
                   continue
@@@ -2449,10 -2406,12 +2449,12 @@@
                   self._downloader.report_warning(msg)
           return res
   
-     def _set_cookie(self, domain, name, value, expire_time=None):
+     def _set_cookie(self, domain, name, value, expire_time=None, port=None,
+                     path='/', secure=False, discard=False, rest={}, **kwargs):
           cookie = compat_cookiejar.Cookie(
-             0, name, value, None, None, domain, None,
-             None, '/', True, False, expire_time, '', None, None, None)
+             0, name, value, port, not port is None, domain, True,
+             domain.startswith('.'), path, True, secure, expire_time,
+             discard, None, None, rest)
           self._downloader.cookiejar.set_cookie(cookie)
   
       def _get_cookies(self, url):
diff --combined youtube_dl/utils.py

index c42dd4c3ae20d59b6646b64180720c4482a0e901,4d0685d83a3d888bdb84b067ecf1ebacdaa7ac7f..9e4492d402c225d53071d1424005ff5be0577681
--- 1/youtube_dl/utils.py
--- 2/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@@ -22,6 -22,7 +22,6 @@@ import local
   import math
   import operator
   import os
- -import pipes
   import platform
   import random
   import re
@@@ -35,7 -36,6 +35,7 @@@ import xml.etree.ElementTre
   import zlib
   
   from .compat import (
+ +    compat_HTMLParseError,
       compat_HTMLParser,
       compat_basestring,
       compat_chr,
@@@ -365,9 -365,9 +365,9 @@@ def get_elements_by_attribute(attribute
       retlist = []
       for m in re.finditer(r'''(?xs)
           <([a-zA-Z0-9:._-]+)
- -         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
+ +         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
            \s+%s=['"]?%s['"]?
- -         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
+ +         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
           \s*>
           (?P<content>.*?)
           </\1>
@@@ -409,12 -409,8 +409,12 @@@ def extract_attributes(html_element)
       but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
       """
       parser = HTMLAttributeParser()
- -    parser.feed(html_element)
- -    parser.close()
+ +    try:
+ +        parser.feed(html_element)
+ +        parser.close()
+ +    # Older Python may throw HTMLParseError in case of malformed HTML
+ +    except compat_HTMLParseError:
+ +        pass
       return parser.attrs
   
   
@@@ -596,7 -592,7 +596,7 @@@ def unescapeHTML(s)
       assert type(s) == compat_str
   
       return re.sub(
- -        r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
+ +        r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
   
   
   def get_subprocess_encoding():
@@@ -936,6 -932,14 +936,6 @@@ class YoutubeDLHandler(compat_urllib_re
           except zlib.error:
               return zlib.decompress(data)
   
- -    @staticmethod
- -    def addinfourl_wrapper(stream, headers, url, code):
- -        if hasattr(compat_urllib_request.addinfourl, 'getcode'):
- -            return compat_urllib_request.addinfourl(stream, headers, url, code)
- -        ret = compat_urllib_request.addinfourl(stream, headers, url)
- -        ret.code = code
- -        return ret
- -
       def http_request(self, req):
           # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
           # always respected by websites, some tend to give out URLs with non percent-encoded
@@@ -987,13 -991,13 +987,13 @@@
                       break
                   else:
                       raise original_ioerror
- -            resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
+ +            resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
               resp.msg = old_resp.msg
               del resp.headers['Content-encoding']
           # deflate
           if resp.headers.get('Content-encoding', '') == 'deflate':
               gz = io.BytesIO(self.deflate(resp.read()))
- -            resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+ +            resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
               resp.msg = old_resp.msg
               del resp.headers['Content-encoding']
           # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
@@@ -1183,7 -1187,7 +1183,7 @@@ def unified_timestamp(date_str, day_fir
       if date_str is None:
           return None
   
- -    date_str = date_str.replace(',', ' ')
+ +    date_str = re.sub(r'[,|]', '', date_str)
   
       pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
       timezone, date_str = extract_timezone(date_str)
@@@ -1534,7 -1538,7 +1534,7 @@@ def shell_quote(args)
           if isinstance(a, bytes):
               # We may get a filename encoded with 'encodeFilename'
               a = a.decode(encoding)
- -        quoted_args.append(pipes.quote(a))
+ +        quoted_args.append(compat_shlex_quote(a))
       return ' '.join(quoted_args)
   
   
@@@ -1815,10 -1819,6 +1815,10 @@@ def float_or_none(v, scale=1, invscale=
           return default
   
   
+ +def bool_or_none(v, default=None):
+ +    return v if isinstance(v, bool) else default
+ +
+ +
   def strip_or_none(v):
       return None if v is None else v.strip()
   
@@@ -2098,7 -2098,7 +2098,7 @@@ def update_Request(req, url=None, data=
       return new_req
   
   
- -def try_multipart_encode(data, boundary):
+ +def _multipart_encode_impl(data, boundary):
       content_type = 'multipart/form-data; boundary=%s' % boundary
   
       out = b''
@@@ -2110,7 -2110,7 +2110,7 @@@
               v = v.encode('utf-8')
           # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
           # suggests sending UTF-8 directly. Firefox sends UTF-8, too
- -        content = b'Content-Disposition: form-data; name="%s"\r\n\r\n' % k + v + b'\r\n'
+ +        content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
           if boundary.encode('ascii') in content:
               raise ValueError('Boundary overlaps with data')
           out += content
@@@ -2140,7 -2140,7 +2140,7 @@@ def multipart_encode(data, boundary=Non
               boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
   
           try:
- -            out, content_type = try_multipart_encode(data, boundary)
+ +            out, content_type = _multipart_encode_impl(data, boundary)
               break
           except ValueError:
               if has_specified_boundary:
@@@ -2211,12 -2211,7 +2211,12 @@@ def parse_age_limit(s)
   
   def strip_jsonp(code):
       return re.sub(
- -        r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
+ +        r'''(?sx)^
+ +            (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
+ +            (?:\s*&&\s*(?P=func_name))?
+ +            \s*\(\s*(?P<callback_data>.*)\);?
+ +            \s*?(?://[^\n]*)*$''',
+ +        r'\g<callback_data>', code)
   
   
   def js_to_json(code):
@@@ -2365,11 -2360,11 +2365,11 @@@ def parse_codecs(codecs_str)
           if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
               if not vcodec:
                   vcodec = full_codec
- -        elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3'):
+ +        elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
               if not acodec:
                   acodec = full_codec
           else:
- -            write_string('WARNING: Unknown codec %s' % full_codec, sys.stderr)
+ +            write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
       if not vcodec and not acodec:
           if len(splited_codecs) == 2:
               return {
@@@ -2737,8 -2732,6 +2737,8 @@@ def cli_option(params, command_option, 
   
   def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
       param = params.get(param)
+ +    if param is None:
+ +        return []
       assert isinstance(param, bool)
       if separator:
           return [command_option + separator + (true_value if param else false_value)]
@@@ -3822,6 -3815,219 +3822,219 @@@ def write_xattr(path, key, value)
                           "or the 'xattr' binary.")
   
   
+ def cookie_to_dict(cookie):
+     cookie_dict = {
+         'name': cookie.name,
+         'value': cookie.value,
+     };
+     if cookie.port_specified:
+         cookie_dict['port'] = cookie.port
+     if cookie.domain_specified:
+         cookie_dict['domain'] = cookie.domain
+     if cookie.path_specified:
+         cookie_dict['path'] = cookie.path
+     if not cookie.expires is None:
+         cookie_dict['expires'] = cookie.expires
+     if not cookie.secure is None:
+         cookie_dict['secure'] = cookie.secure
+     if not cookie.discard is None:
+         cookie_dict['discard'] = cookie.discard
+     try:
+         if (cookie.has_nonstandard_attr('httpOnly') or
+             cookie.has_nonstandard_attr('httponly') or
+             cookie.has_nonstandard_attr('HttpOnly')):
+             cookie_dict['httponly'] = True
+     except TypeError:
+         pass
+     return cookie_dict
+ 
+ 
+ def cookie_jar_to_list(cookie_jar):
+     return [cookie_to_dict(cookie) for cookie in cookie_jar]
+ 
+ 
+ class PhantomJSwrapper(object):
+     """PhantomJS wrapper class"""
+ 
+     _TEMPLATE = r'''
+         phantom.onError = function(msg, trace) {{
+           var msgStack = ['PHANTOM ERROR: ' + msg];
+           if(trace && trace.length) {{
+             msgStack.push('TRACE:');
+             trace.forEach(function(t) {{
+               msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
+                 + (t.function ? ' (in function ' + t.function +')' : ''));
+             }});
+           }}
+           console.error(msgStack.join('\n'));
+           phantom.exit(1);
+         }};
+         var page = require('webpage').create();
+         var fs = require('fs');
+         var read = {{ mode: 'r', charset: 'utf-8' }};
+         var write = {{ mode: 'w', charset: 'utf-8' }};
+         JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
+           phantom.addCookie(x);
+         }});
+         page.settings.resourceTimeout = {timeout};
+         page.settings.userAgent = "{ua}";
+         page.onLoadStarted = function() {{
+           page.evaluate(function() {{
+             delete window._phantom;
+             delete window.callPhantom;
+           }});
+         }};
+         var saveAndExit = function() {{
+           fs.write("{html}", page.content, write);
+           fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
+           phantom.exit();
+         }};
+         page.onLoadFinished = function(status) {{
+           if(page.url === "") {{
+             page.setContent(fs.read("{html}", read), "{url}");
+           }}
+           else {{
+             {jscode}
+           }}
+         }};
+         page.open("");
+     '''
+ 
+     _TMP_FILE_NAMES = ['script', 'html', 'cookies']
+ 
+     @staticmethod
+     def _version():
+         return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
+ 
+     def __init__(self, extractor, required_version=None, timeout=10000):
+         self.exe = check_executable('phantomjs', ['-v'])
+         if not self.exe:
+             raise ExtractorError('PhantomJS executable not found in PATH, '
+                                  'download it from http://phantomjs.org',
+                                  expected=True)
+ 
+         self.extractor = extractor
+ 
+         if required_version:
+             version = self._version()
+             if is_outdated_version(version, required_version):
+                 self.extractor._downloader.report_warning(
+                     'Your copy of PhantomJS is outdated, update it to version '
+                     '%s or newer if you encounter any errors.' % required_version)
+ 
+         self.options = {
+             'timeout': timeout,
+         }
+         self._TMP_FILES = {}
+         for name in self._TMP_FILE_NAMES:
+             tmp = tempfile.NamedTemporaryFile(delete=False)
+             tmp.close()
+             self._TMP_FILES[name] = tmp
+ 
+     def __del__(self):
+         for name in self._TMP_FILE_NAMES:
+             try:
+                 os.remove(self._TMP_FILES[name].name)
+             except:
+                 pass
+ 
+     def _save_cookies(self, url):
+         cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
+         for cookie in cookies:
+             if 'path' not in cookie:
+                 cookie['path'] = '/'
+             if 'domain' not in cookie:
+                 cookie['domain'] = compat_urlparse.urlparse(url).netloc
+         with open(self._TMP_FILES['cookies'].name, 'wb') as f:
+             f.write(json.dumps(cookies).encode('utf-8'))
+ 
+     def _load_cookies(self):
+         with open(self._TMP_FILES['cookies'].name, 'rb') as f:
+             cookies = json.loads(f.read().decode('utf-8'))
+         for cookie in cookies:
+             if cookie['httponly'] is True:
+                 cookie['rest'] = { 'httpOnly': None }
+             if 'expiry' in cookie:
+                 cookie['expire_time'] = cookie['expiry']
+             self.extractor._set_cookie(**cookie)
+ 
+     def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
+         """
+         Downloads webpage (if needed) and executes JS
+         
+         Params:
+             url: website url
+             html: optional, html code of website
+             video_id: video id
+             note: optional, displayed when downloading webpage
+             note2: optional, displayed when executing JS
+             headers: custom http headers
+             jscode: code to be executed when page is loaded
+         
+         Returns tuple with:
+             * downloaded website (after JS execution)
+             * anything you print with `console.log` (but not inside `page.execute`!)
+         
+         In most cases you don't need to add any `jscode`.
+         It is executed in `page.onLoadFinished`.
+         `saveAndExit();` is mandatory, use it instead of `phantom.exit()`
+         It is possible to wait for some element on the webpage, for example:
+             var check = function() {
+               var elementFound = page.evaluate(function() {
+                 return document.querySelector('#b.done') !== null;
+               });
+               if(elementFound)
+                 saveAndExit();
+               else
+                 window.setTimeout(check, 500);
+             }
+             
+             page.evaluate(function(){
+               document.querySelector('#a').click();
+             });
+             check();
+         """
+         if 'saveAndExit();' not in jscode:
+             raise ExtractorError('`saveAndExit();` not found in `jscode`')
+         if not html:
+             html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
+         with open(self._TMP_FILES['html'].name, 'wb') as f:
+             f.write(html.encode('utf-8'))
+ 
+         self._save_cookies(url)
+ 
+         replaces = self.options
+         replaces['url'] = url
+         user_agent = headers.get('User-Agent') or std_headers['User-Agent']
+         replaces['ua'] = user_agent.replace('"', '\\"')
+         replaces['jscode'] = jscode
+ 
+         for x in self._TMP_FILE_NAMES:
+             replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
+ 
+         with open(self._TMP_FILES['script'].name, 'wb') as f:
+             f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
+ 
+         if video_id is None:
+             self.extractor.to_screen('%s' % (note2,))
+         else:
+             self.extractor.to_screen('%s: %s' % (video_id, note2))
+ 
+         p = subprocess.Popen([self.exe, '--ssl-protocol=any',
+             self._TMP_FILES['script'].name], stdout=subprocess.PIPE,
+             stderr=subprocess.PIPE)
+         out, err = p.communicate()
+         if p.returncode != 0:
+             raise ExtractorError('Executing JS failed\n:'
+                                  + encodeArgument(err))
+         with open(self._TMP_FILES['html'].name, 'rb') as f:
+             html = f.read().decode('utf-8')
+ 
+         self._load_cookies()
+ 
+         return (html, encodeArgument(out))
+ 
+ 
   def random_birthday(year_field, month_field, day_field):
       return {
           year_field: str(random.randint(1950, 1995)),
author	Yen Chi Hsuan <yan12125@gmail.com>
	Fri, 15 Sep 2017 18:28:28 +0000 (02:28 +0800)
committer	GitHub <noreply@github.com>
	Fri, 15 Sep 2017 18:28:28 +0000 (02:28 +0800)
		1	2
youtube_dl/YoutubeDL.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/extractor/common.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/utils.py	patch \|	diff1 \|	diff2 \|	blob \| history