X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=dda222feef201eefd642cb5a6fd9f4d79b983648;hb=f7a211dcc8d86fe5a4d25f4d209ffb3efc05bdfc;hp=0e73dc8ffe4a5fe8d0c890f0f4c1ccf28a688d2b;hpb=8940b8608e567dba09b3ea146b89b297190ec6d6;p=youtube-dl.git diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 0e73dc8ff..dda222fee 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -25,6 +25,7 @@ if os.name == 'nt': import ctypes from .compat import ( + compat_basestring, compat_cookiejar, compat_expanduser, compat_http_client, @@ -137,6 +138,7 @@ class YoutubeDL(object): nooverwrites: Prevent overwriting files. playliststart: Playlist item to start at. playlistend: Playlist item to end at. + playlist_items: Specific indices of playlist to download. playlistreverse: Download playlist items in reverse order. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. @@ -197,11 +199,12 @@ class YoutubeDL(object): postprocessor. progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries - * filename: The final filename - * status: One of "downloading" and "finished" - - The dict may also have some of the following entries: + * status: One of "downloading" and "finished". + Check this first and ignore unknown values. + If status is one of "downloading" or "finished", the + following properties may also be present: + * filename: The final filename (always present) * downloaded_bytes: Bytes on disk * total_bytes: Size of the whole file, None if unknown * tmpfilename: The filename we're currently writing to @@ -225,12 +228,18 @@ class YoutubeDL(object): external_downloader: Executable of the external downloader to call. listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. + match_filter: A function that gets called with the info_dict of + every video. + If it returns a message, the video is ignored. + If it returns None, the video is downloaded. + match_filter_func in utils.py is one example for this. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, - noresizebuffer, retries, continuedl, noprogress, consoletitle + noresizebuffer, retries, continuedl, noprogress, consoletitle, + xattr_set_filesize. The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, @@ -540,6 +549,11 @@ class YoutubeDL(object): outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict + # Temporary fix for #4787 + # 'Treat' all problem characters by passing filename through preferredencoding + # to workaround encoding issues with subprocess on python2 @ Windows + if sys.version_info < (3, 0) and sys.platform == 'win32': + filename = encodeFilename(filename, True).decode(preferredencoding()) return filename except ValueError as err: self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') @@ -574,9 +588,16 @@ class YoutubeDL(object): if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): - return 'Skipping "%s" because it is age restricted' % title + return 'Skipping "%s" because it is age restricted' % video_title if self.in_download_archive(info_dict): return '%s has already been recorded in archive' % video_title + + match_filter = self.params.get('match_filter') + if match_filter is not None: + ret = match_filter(info_dict) + if ret is not None: + return ret + return None @staticmethod @@ -703,24 +724,51 @@ class YoutubeDL(object): if playlistend == -1: playlistend = None + playlistitems_str = self.params.get('playlist_items', None) + playlistitems = None + if playlistitems_str is not None: + def iter_playlistitems(format): + for string_segment in format.split(','): + if '-' in string_segment: + start, end = string_segment.split('-') + for item in range(int(start), int(end) + 1): + yield int(item) + else: + yield int(string_segment) + playlistitems = iter_playlistitems(playlistitems_str) + ie_entries = ie_result['entries'] if isinstance(ie_entries, list): n_all_entries = len(ie_entries) - entries = ie_entries[playliststart:playlistend] + if playlistitems: + entries = [ie_entries[i - 1] for i in playlistitems] + else: + entries = ie_entries[playliststart:playlistend] n_entries = len(entries) self.to_screen( "[%s] playlist %s: Collected %d video ids (downloading %d of them)" % (ie_result['extractor'], playlist, n_all_entries, n_entries)) elif isinstance(ie_entries, PagedList): - entries = ie_entries.getslice( - playliststart, playlistend) + if playlistitems: + entries = [] + for item in playlistitems: + entries.extend(ie_entries.getslice( + item - 1, item + )) + else: + entries = ie_entries.getslice( + playliststart, playlistend) n_entries = len(entries) self.to_screen( "[%s] playlist %s: Downloading %d videos" % (ie_result['extractor'], playlist, n_entries)) else: # iterable - entries = list(itertools.islice( - ie_entries, playliststart, playlistend)) + if playlistitems: + entry_list = list(ie_entries) + entries = [entry_list[i - 1] for i in playlistitems] + else: + entries = list(itertools.islice( + ie_entries, playliststart, playlistend)) n_entries = len(entries) self.to_screen( "[%s] playlist %s: Downloading %d videos" % @@ -790,26 +838,43 @@ class YoutubeDL(object): '!=': operator.ne, } operator_rex = re.compile(r'''(?x)\s*\[ - (?Pwidth|height|tbr|abr|vbr|filesize) + (?Pwidth|height|tbr|abr|vbr|asr|filesize|fps) \s*(?P%s)(?P\s*\?)?\s* (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) \]$ ''' % '|'.join(map(re.escape, OPERATORS.keys()))) m = operator_rex.search(format_spec) + if m: + try: + comparison_value = int(m.group('value')) + except ValueError: + comparison_value = parse_filesize(m.group('value')) + if comparison_value is None: + comparison_value = parse_filesize(m.group('value') + 'B') + if comparison_value is None: + raise ValueError( + 'Invalid value %r in format specification %r' % ( + m.group('value'), format_spec)) + op = OPERATORS[m.group('op')] + if not m: - raise ValueError('Invalid format specification %r' % format_spec) + STR_OPERATORS = { + '=': operator.eq, + '!=': operator.ne, + } + str_operator_rex = re.compile(r'''(?x)\s*\[ + \s*(?Pext|acodec|vcodec|container|protocol) + \s*(?P%s)(?P\s*\?)? + \s*(?P[a-zA-Z0-9_-]+) + \s*\]$ + ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) + m = str_operator_rex.search(format_spec) + if m: + comparison_value = m.group('value') + op = STR_OPERATORS[m.group('op')] - try: - comparison_value = int(m.group('value')) - except ValueError: - comparison_value = parse_filesize(m.group('value')) - if comparison_value is None: - comparison_value = parse_filesize(m.group('value') + 'B') - if comparison_value is None: - raise ValueError( - 'Invalid value %r in format specification %r' % ( - m.group('value'), format_spec)) - op = OPERATORS[m.group('op')] + if not m: + raise ValueError('Invalid format specification %r' % format_spec) def _filter(f): actual_value = f.get(m.group('key')) @@ -902,6 +967,9 @@ class YoutubeDL(object): def has_header(self, h): return h in self.headers + def get_header(self, h, default=None): + return self.headers.get(h, default) + pr = _PseudoRequest(info_dict['url']) self.cookiejar.add_cookie_header(pr) return pr.headers.get('Cookie') @@ -923,14 +991,16 @@ class YoutubeDL(object): if thumbnails is None: thumbnail = info_dict.get('thumbnail') if thumbnail: - thumbnails = [{'url': thumbnail}] + info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] if thumbnails: thumbnails.sort(key=lambda t: ( t.get('preference'), t.get('width'), t.get('height'), t.get('id'), t.get('url'))) - for t in thumbnails: + for i, t in enumerate(thumbnails): if 'width' in t and 'height' in t: t['resolution'] = '%dx%d' % (t['width'], t['height']) + if t.get('id') is None: + t['id'] = '%d' % i if thumbnails and 'thumbnail' not in info_dict: info_dict['thumbnail'] = thumbnails[-1]['url'] @@ -1038,8 +1108,10 @@ class YoutubeDL(object): else self.params['merge_output_format']) selected_format = { 'requested_formats': formats_info, - 'format': rf, - 'ext': formats_info[0]['ext'], + 'format': '%s+%s' % (formats_info[0].get('format'), + formats_info[1].get('format')), + 'format_id': '%s+%s' % (formats_info[0].get('format_id'), + formats_info[1].get('format_id')), 'width': formats_info[0].get('width'), 'height': formats_info[0].get('height'), 'resolution': formats_info[0].get('resolution'), @@ -1100,7 +1172,7 @@ class YoutubeDL(object): self._num_downloads += 1 - filename = self.prepare_filename(info_dict) + info_dict['_filename'] = filename = self.prepare_filename(info_dict) # Forced printings if self.params.get('forcetitle', False): @@ -1125,10 +1197,7 @@ class YoutubeDL(object): if self.params.get('forceformat', False): self.to_stdout(info_dict['format']) if self.params.get('forcejson', False): - info_dict['_filename'] = filename self.to_stdout(json.dumps(info_dict)) - if self.params.get('dump_single_json', False): - info_dict['_filename'] = filename # Do nothing else if in simulate mode if self.params.get('simulate', False): @@ -1222,6 +1291,7 @@ class YoutubeDL(object): if self.params.get('verbose'): self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) return fd.download(name, info) + if info_dict.get('requested_formats') is not None: downloaded = [] success = True @@ -1488,7 +1558,6 @@ class YoutubeDL(object): line(f, idlen) for f in formats if f.get('preference') is None or f['preference'] >= -1000] if len(formats) > 1: - formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)' formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' header_line = line({ @@ -1524,7 +1593,7 @@ class YoutubeDL(object): # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) # To work around aforementioned issue we will replace request's original URL with # percent-encoded one - req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str) + req_is_string = isinstance(req, compat_basestring) url = req if req_is_string else req.get_full_url() url_escaped = escape_url(url)