2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
29 compat_urllib_request,
51 UnavailableVideoError,
58 from .extractor import get_info_extractor, gen_extractors
59 from .downloader import get_suitable_downloader
60 from .postprocessor import FFmpegMergerPP
61 from .version import __version__
64 class YoutubeDL(object):
67 YoutubeDL objects are the ones responsible of downloading the
68 actual video file and writing it to disk if the user has requested
69 it, among some other tasks. In most cases there should be one per
70 program. As, given a video URL, the downloader doesn't know how to
71 extract all the needed information, task that InfoExtractors do, it
72 has to pass the URL to one of them.
74 For this, YoutubeDL objects have a method that allows
75 InfoExtractors to be registered in a given order. When it is passed
76 a URL, the YoutubeDL object handles it to the first InfoExtractor it
77 finds that reports being able to handle it. The InfoExtractor extracts
78 all the information about the video or videos the URL refers to, and
79 YoutubeDL process the extracted information, possibly using a File
80 Downloader to download the video.
82 YoutubeDL objects accept a lot of parameters. In order not to saturate
83 the object constructor with arguments, it receives a dictionary of
84 options instead. These options are available through the params
85 attribute for the InfoExtractors to use. The YoutubeDL also
86 registers itself as the downloader in charge for the InfoExtractors
87 that are added to it, so this is a "mutual registration".
91 username: Username for authentication purposes.
92 password: Password for authentication purposes.
93 videopassword: Password for acces a video.
94 usenetrc: Use netrc for authentication instead.
95 verbose: Print additional info to stdout.
96 quiet: Do not print messages to stdout.
97 forceurl: Force printing final URL.
98 forcetitle: Force printing title.
99 forceid: Force printing ID.
100 forcethumbnail: Force printing thumbnail URL.
101 forcedescription: Force printing description.
102 forcefilename: Force printing final filename.
103 forceduration: Force printing duration.
104 forcejson: Force printing info_dict as JSON.
105 simulate: Do not download the video files.
106 format: Video format code.
107 format_limit: Highest quality format to try.
108 outtmpl: Template for output names.
109 restrictfilenames: Do not allow "&" and spaces in file names
110 ignoreerrors: Do not stop on download errors.
111 nooverwrites: Prevent overwriting files.
112 playliststart: Playlist item to start at.
113 playlistend: Playlist item to end at.
114 matchtitle: Download only matching titles.
115 rejecttitle: Reject downloads for matching titles.
116 logger: Log messages to a logging.Logger instance.
117 logtostderr: Log messages to stderr instead of stdout.
118 writedescription: Write the video description to a .description file
119 writeinfojson: Write the video description to a .info.json file
120 writeannotations: Write the video annotations to a .annotations.xml file
121 writethumbnail: Write the thumbnail image to a file
122 writesubtitles: Write the video subtitles to a file
123 writeautomaticsub: Write the automatic subtitles to a file
124 allsubtitles: Downloads all the subtitles of the video
125 (requires writesubtitles or writeautomaticsub)
126 listsubtitles: Lists all available subtitles for the video
127 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
128 subtitleslangs: List of languages of the subtitles to download
129 keepvideo: Keep the video file after post-processing
130 daterange: A DateRange object, download only if the upload_date is in the range.
131 skip_download: Skip the actual download of the video file
132 cachedir: Location of the cache files in the filesystem.
133 None to disable filesystem cache.
134 noplaylist: Download single video instead of a playlist if in doubt.
135 age_limit: An integer representing the user's age in years.
136 Unsuitable videos for the given age are skipped.
137 min_views: An integer representing the minimum view count the video
138 must have in order to not be skipped.
139 Videos without view count information are always
140 downloaded. None for no limit.
141 max_views: An integer representing the maximum view count.
142 Videos that are more popular than that are not
144 Videos without view count information are always
145 downloaded. None for no limit.
146 download_archive: File name of a file where all downloads are recorded.
147 Videos already present in the file are not downloaded
149 cookiefile: File name where cookies should be read from and dumped to.
150 nocheckcertificate:Do not verify SSL certificates
151 proxy: URL of the proxy server to use
152 socket_timeout: Time to wait for unresponsive hosts, in seconds
153 bidi_workaround: Work around buggy terminals without bidirectional text
154 support, using fridibi
155 debug_printtraffic:Print out sent and received HTTP traffic
156 include_ads: Download ads as well
157 default_search: Prepend this string if an input url is not valid.
158 'auto' for elaborate guessing
160 The following parameters are not used by YoutubeDL itself, they are used by
162 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
163 noresizebuffer, retries, continuedl, noprogress, consoletitle
165 The following options are used by the post processors:
166 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
167 otherwise prefer avconv.
173 _download_retcode = None
174 _num_downloads = None
177 def __init__(self, params=None):
178 """Create a FileDownloader object with the given options."""
182 self._ies_instances = {}
184 self._progress_hooks = []
185 self._download_retcode = 0
186 self._num_downloads = 0
187 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
188 self._err_file = sys.stderr
191 if params.get('bidi_workaround', False):
194 master, slave = pty.openpty()
195 width = get_term_width()
199 width_args = ['-w', str(width)]
201 stdin=subprocess.PIPE,
203 stderr=self._err_file)
205 self._output_process = subprocess.Popen(
206 ['bidiv'] + width_args, **sp_kwargs
209 self._output_process = subprocess.Popen(
210 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
211 self._output_channel = os.fdopen(master, 'rb')
212 except OSError as ose:
214 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
218 if (sys.version_info >= (3,) and sys.platform != 'win32' and
219 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
220 and not params['restrictfilenames']):
221 # On Python 3, the Unicode filesystem API will throw errors (#1474)
223 'Assuming --restrict-filenames since file system encoding '
224 'cannot encode all charactes. '
225 'Set the LC_ALL environment variable to fix this.')
226 self.params['restrictfilenames'] = True
228 if '%(stitle)s' in self.params.get('outtmpl', ''):
229 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
233 def add_info_extractor(self, ie):
234 """Add an InfoExtractor object to the end of the list."""
236 self._ies_instances[ie.ie_key()] = ie
237 ie.set_downloader(self)
239 def get_info_extractor(self, ie_key):
241 Get an instance of an IE with name ie_key, it will try to get one from
242 the _ies list, if there's no instance it will create a new one and add
243 it to the extractor list.
245 ie = self._ies_instances.get(ie_key)
247 ie = get_info_extractor(ie_key)()
248 self.add_info_extractor(ie)
251 def add_default_info_extractors(self):
253 Add the InfoExtractors returned by gen_extractors to the end of the list
255 for ie in gen_extractors():
256 self.add_info_extractor(ie)
258 def add_post_processor(self, pp):
259 """Add a PostProcessor object to the end of the chain."""
261 pp.set_downloader(self)
263 def add_progress_hook(self, ph):
264 """Add the progress hook (currently only for the file downloader)"""
265 self._progress_hooks.append(ph)
267 def _bidi_workaround(self, message):
268 if not hasattr(self, '_output_channel'):
271 assert hasattr(self, '_output_process')
272 assert type(message) == type('')
273 line_count = message.count('\n') + 1
274 self._output_process.stdin.write((message + '\n').encode('utf-8'))
275 self._output_process.stdin.flush()
276 res = ''.join(self._output_channel.readline().decode('utf-8')
277 for _ in range(line_count))
278 return res[:-len('\n')]
280 def to_screen(self, message, skip_eol=False):
281 """Print message to stdout if not in quiet mode."""
282 return self.to_stdout(message, skip_eol, check_quiet=True)
284 def to_stdout(self, message, skip_eol=False, check_quiet=False):
285 """Print message to stdout if not in quiet mode."""
286 if self.params.get('logger'):
287 self.params['logger'].debug(message)
288 elif not check_quiet or not self.params.get('quiet', False):
289 message = self._bidi_workaround(message)
290 terminator = ['\n', ''][skip_eol]
291 output = message + terminator
293 write_string(output, self._screen_file)
295 def to_stderr(self, message):
296 """Print message to stderr."""
297 assert type(message) == type('')
298 if self.params.get('logger'):
299 self.params['logger'].error(message)
301 message = self._bidi_workaround(message)
302 output = message + '\n'
303 write_string(output, self._err_file)
305 def to_console_title(self, message):
306 if not self.params.get('consoletitle', False):
308 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
309 # c_wchar_p() might not be necessary if `message` is
310 # already of type unicode()
311 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
312 elif 'TERM' in os.environ:
313 write_string('\033]0;%s\007' % message, self._screen_file)
315 def save_console_title(self):
316 if not self.params.get('consoletitle', False):
318 if 'TERM' in os.environ:
319 # Save the title on stack
320 write_string('\033[22;0t', self._screen_file)
322 def restore_console_title(self):
323 if not self.params.get('consoletitle', False):
325 if 'TERM' in os.environ:
326 # Restore the title from stack
327 write_string('\033[23;0t', self._screen_file)
330 self.save_console_title()
333 def __exit__(self, *args):
334 self.restore_console_title()
336 if self.params.get('cookiefile') is not None:
337 self.cookiejar.save()
339 def trouble(self, message=None, tb=None):
340 """Determine action to take when a download problem appears.
342 Depending on if the downloader has been configured to ignore
343 download errors or not, this method may throw an exception or
344 not when errors are found, after printing the message.
346 tb, if given, is additional traceback information.
348 if message is not None:
349 self.to_stderr(message)
350 if self.params.get('verbose'):
352 if sys.exc_info()[0]: # if .trouble has been called from an except block
354 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
355 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
356 tb += compat_str(traceback.format_exc())
358 tb_data = traceback.format_list(traceback.extract_stack())
359 tb = ''.join(tb_data)
361 if not self.params.get('ignoreerrors', False):
362 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
363 exc_info = sys.exc_info()[1].exc_info
365 exc_info = sys.exc_info()
366 raise DownloadError(message, exc_info)
367 self._download_retcode = 1
369 def report_warning(self, message):
371 Print the message to stderr, it will be prefixed with 'WARNING:'
372 If stderr is a tty file the 'WARNING:' will be colored
374 if self.params.get('logger') is not None:
375 self.params['logger'].warning(message)
377 if self._err_file.isatty() and os.name != 'nt':
378 _msg_header = '\033[0;33mWARNING:\033[0m'
380 _msg_header = 'WARNING:'
381 warning_message = '%s %s' % (_msg_header, message)
382 self.to_stderr(warning_message)
384 def report_error(self, message, tb=None):
386 Do the same as trouble, but prefixes the message with 'ERROR:', colored
387 in red if stderr is a tty file.
389 if self._err_file.isatty() and os.name != 'nt':
390 _msg_header = '\033[0;31mERROR:\033[0m'
392 _msg_header = 'ERROR:'
393 error_message = '%s %s' % (_msg_header, message)
394 self.trouble(error_message, tb)
396 def report_file_already_downloaded(self, file_name):
397 """Report file has already been fully downloaded."""
399 self.to_screen('[download] %s has already been downloaded' % file_name)
400 except UnicodeEncodeError:
401 self.to_screen('[download] The file has already been downloaded')
403 def prepare_filename(self, info_dict):
404 """Generate the output filename."""
406 template_dict = dict(info_dict)
408 template_dict['epoch'] = int(time.time())
409 autonumber_size = self.params.get('autonumber_size')
410 if autonumber_size is None:
412 autonumber_templ = '%0' + str(autonumber_size) + 'd'
413 template_dict['autonumber'] = autonumber_templ % self._num_downloads
414 if template_dict.get('playlist_index') is not None:
415 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
416 if template_dict.get('resolution') is None:
417 if template_dict.get('width') and template_dict.get('height'):
418 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
419 elif template_dict.get('height'):
420 template_dict['resolution'] = '%sp' % template_dict['height']
421 elif template_dict.get('width'):
422 template_dict['resolution'] = '?x%d' % template_dict['width']
424 sanitize = lambda k, v: sanitize_filename(
426 restricted=self.params.get('restrictfilenames'),
428 template_dict = dict((k, sanitize(k, v))
429 for k, v in template_dict.items()
431 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
433 tmpl = os.path.expanduser(self.params['outtmpl'])
434 filename = tmpl % template_dict
436 except ValueError as err:
437 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
440 def _match_entry(self, info_dict):
441 """ Returns None iff the file should be downloaded """
443 video_title = info_dict.get('title', info_dict.get('id', 'video'))
444 if 'title' in info_dict:
445 # This can happen when we're just evaluating the playlist
446 title = info_dict['title']
447 matchtitle = self.params.get('matchtitle', False)
449 if not re.search(matchtitle, title, re.IGNORECASE):
450 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
451 rejecttitle = self.params.get('rejecttitle', False)
453 if re.search(rejecttitle, title, re.IGNORECASE):
454 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
455 date = info_dict.get('upload_date', None)
457 dateRange = self.params.get('daterange', DateRange())
458 if date not in dateRange:
459 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
460 view_count = info_dict.get('view_count', None)
461 if view_count is not None:
462 min_views = self.params.get('min_views')
463 if min_views is not None and view_count < min_views:
464 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
465 max_views = self.params.get('max_views')
466 if max_views is not None and view_count > max_views:
467 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
468 age_limit = self.params.get('age_limit')
469 if age_limit is not None:
470 if age_limit < info_dict.get('age_limit', 0):
471 return 'Skipping "' + title + '" because it is age restricted'
472 if self.in_download_archive(info_dict):
473 return '%s has already been recorded in archive' % video_title
477 def add_extra_info(info_dict, extra_info):
478 '''Set the keys from extra_info in info dict if they are missing'''
479 for key, value in extra_info.items():
480 info_dict.setdefault(key, value)
482 def extract_info(self, url, download=True, ie_key=None, extra_info={},
485 Returns a list with a dictionary for each video we find.
486 If 'download', also downloads the videos.
487 extra_info is a dict containing the extra values to add to each result
491 ies = [self.get_info_extractor(ie_key)]
496 if not ie.suitable(url):
500 self.report_warning('The program functionality for this site has been marked as broken, '
501 'and will probably not work.')
504 ie_result = ie.extract(url)
505 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
507 if isinstance(ie_result, list):
508 # Backwards compatibility: old IE result format
510 '_type': 'compat_list',
511 'entries': ie_result,
513 self.add_extra_info(ie_result,
515 'extractor': ie.IE_NAME,
517 'webpage_url_basename': url_basename(url),
518 'extractor_key': ie.ie_key(),
521 return self.process_ie_result(ie_result, download, extra_info)
524 except ExtractorError as de: # An error we somewhat expected
525 self.report_error(compat_str(de), de.format_traceback())
527 except MaxDownloadsReached:
529 except Exception as e:
530 if self.params.get('ignoreerrors', False):
531 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
536 self.report_error('no suitable InfoExtractor: %s' % url)
538 def process_ie_result(self, ie_result, download=True, extra_info={}):
540 Take the result of the ie(may be modified) and resolve all unresolved
541 references (URLs, playlist items).
543 It will also download the videos if 'download'.
544 Returns the resolved ie_result.
547 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
548 if result_type == 'video':
549 self.add_extra_info(ie_result, extra_info)
550 return self.process_video_result(ie_result, download=download)
551 elif result_type == 'url':
552 # We have to add extra_info to the results because it may be
553 # contained in a playlist
554 return self.extract_info(ie_result['url'],
556 ie_key=ie_result.get('ie_key'),
557 extra_info=extra_info)
558 elif result_type == 'url_transparent':
559 # Use the information from the embedding page
560 info = self.extract_info(
561 ie_result['url'], ie_key=ie_result.get('ie_key'),
562 extra_info=extra_info, download=False, process=False)
564 def make_result(embedded_info):
565 new_result = ie_result.copy()
566 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
567 'entries', 'ie_key', 'duration',
568 'subtitles', 'annotations', 'format',
569 'thumbnail', 'thumbnails'):
572 if f in embedded_info:
573 new_result[f] = embedded_info[f]
575 new_result = make_result(info)
577 assert new_result.get('_type') != 'url_transparent'
578 if new_result.get('_type') == 'compat_list':
579 new_result['entries'] = [
580 make_result(e) for e in new_result['entries']]
582 return self.process_ie_result(
583 new_result, download=download, extra_info=extra_info)
584 elif result_type == 'playlist':
585 # We process each entry in the playlist
586 playlist = ie_result.get('title', None) or ie_result.get('id', None)
587 self.to_screen('[download] Downloading playlist: %s' % playlist)
589 playlist_results = []
591 playliststart = self.params.get('playliststart', 1) - 1
592 playlistend = self.params.get('playlistend', None)
593 # For backwards compatibility, interpret -1 as whole list
594 if playlistend == -1:
597 if isinstance(ie_result['entries'], list):
598 n_all_entries = len(ie_result['entries'])
599 entries = ie_result['entries'][playliststart:playlistend]
600 n_entries = len(entries)
602 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
603 (ie_result['extractor'], playlist, n_all_entries, n_entries))
605 assert isinstance(ie_result['entries'], PagedList)
606 entries = ie_result['entries'].getslice(
607 playliststart, playlistend)
608 n_entries = len(entries)
610 "[%s] playlist %s: Downloading %d videos" %
611 (ie_result['extractor'], playlist, n_entries))
613 for i, entry in enumerate(entries, 1):
614 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
616 'playlist': playlist,
617 'playlist_index': i + playliststart,
618 'extractor': ie_result['extractor'],
619 'webpage_url': ie_result['webpage_url'],
620 'webpage_url_basename': url_basename(ie_result['webpage_url']),
621 'extractor_key': ie_result['extractor_key'],
624 reason = self._match_entry(entry)
625 if reason is not None:
626 self.to_screen('[download] ' + reason)
629 entry_result = self.process_ie_result(entry,
632 playlist_results.append(entry_result)
633 ie_result['entries'] = playlist_results
635 elif result_type == 'compat_list':
637 self.add_extra_info(r,
639 'extractor': ie_result['extractor'],
640 'webpage_url': ie_result['webpage_url'],
641 'webpage_url_basename': url_basename(ie_result['webpage_url']),
642 'extractor_key': ie_result['extractor_key'],
645 ie_result['entries'] = [
646 self.process_ie_result(_fixup(r), download, extra_info)
647 for r in ie_result['entries']
651 raise Exception('Invalid result type: %s' % result_type)
653 def select_format(self, format_spec, available_formats):
654 if format_spec == 'best' or format_spec is None:
655 return available_formats[-1]
656 elif format_spec == 'worst':
657 return available_formats[0]
658 elif format_spec == 'bestaudio':
660 f for f in available_formats
661 if f.get('vcodec') == 'none']
663 return audio_formats[-1]
664 elif format_spec == 'worstaudio':
666 f for f in available_formats
667 if f.get('vcodec') == 'none']
669 return audio_formats[0]
671 extensions = ['mp4', 'flv', 'webm', '3gp']
672 if format_spec in extensions:
673 filter_f = lambda f: f['ext'] == format_spec
675 filter_f = lambda f: f['format_id'] == format_spec
676 matches = list(filter(filter_f, available_formats))
681 def process_video_result(self, info_dict, download=True):
682 assert info_dict.get('_type', 'video') == 'video'
684 if 'playlist' not in info_dict:
685 # It isn't part of a playlist
686 info_dict['playlist'] = None
687 info_dict['playlist_index'] = None
689 if 'display_id' not in info_dict and 'id' in info_dict:
690 info_dict['display_id'] = info_dict['id']
692 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
693 upload_date = datetime.datetime.utcfromtimestamp(
694 info_dict['timestamp'])
695 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
697 # This extractors handle format selection themselves
698 if info_dict['extractor'] in ['Youku']:
700 self.process_info(info_dict)
703 # We now pick which formats have to be downloaded
704 if info_dict.get('formats') is None:
705 # There's only one format available
706 formats = [info_dict]
708 formats = info_dict['formats']
711 raise ExtractorError('No video formats found!')
713 # We check that all the formats have the format and format_id fields
714 for i, format in enumerate(formats):
715 if format.get('format_id') is None:
716 format['format_id'] = compat_str(i)
717 if format.get('format') is None:
718 format['format'] = '{id} - {res}{note}'.format(
719 id=format['format_id'],
720 res=self.format_resolution(format),
721 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
723 # Automatically determine file extension if missing
724 if 'ext' not in format:
725 format['ext'] = determine_ext(format['url'])
727 format_limit = self.params.get('format_limit', None)
729 formats = list(takewhile_inclusive(
730 lambda f: f['format_id'] != format_limit, formats
733 # TODO Central sorting goes here
735 if formats[0] is not info_dict:
736 # only set the 'formats' fields if the original info_dict list them
737 # otherwise we end up with a circular reference, the first (and unique)
738 # element in the 'formats' field in info_dict is info_dict itself,
739 # wich can't be exported to json
740 info_dict['formats'] = formats
741 if self.params.get('listformats', None):
742 self.list_formats(info_dict)
745 req_format = self.params.get('format')
746 if req_format is None:
748 formats_to_download = []
749 # The -1 is for supporting YoutubeIE
750 if req_format in ('-1', 'all'):
751 formats_to_download = formats
753 # We can accept formats requested in the format: 34/5/best, we pick
754 # the first that is available, starting from left
755 req_formats = req_format.split('/')
756 for rf in req_formats:
757 if re.match(r'.+?\+.+?', rf) is not None:
758 # Two formats have been requested like '137+139'
759 format_1, format_2 = rf.split('+')
760 formats_info = (self.select_format(format_1, formats),
761 self.select_format(format_2, formats))
762 if all(formats_info):
764 'requested_formats': formats_info,
766 'ext': formats_info[0]['ext'],
769 selected_format = None
771 selected_format = self.select_format(rf, formats)
772 if selected_format is not None:
773 formats_to_download = [selected_format]
775 if not formats_to_download:
776 raise ExtractorError('requested format not available',
780 if len(formats_to_download) > 1:
781 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
782 for format in formats_to_download:
783 new_info = dict(info_dict)
784 new_info.update(format)
785 self.process_info(new_info)
786 # We update the info dict with the best quality format (backwards compatibility)
787 info_dict.update(formats_to_download[-1])
790 def process_info(self, info_dict):
791 """Process a single resolved IE result."""
793 assert info_dict.get('_type', 'video') == 'video'
795 max_downloads = self.params.get('max_downloads')
796 if max_downloads is not None:
797 if self._num_downloads >= int(max_downloads):
798 raise MaxDownloadsReached()
800 info_dict['fulltitle'] = info_dict['title']
801 if len(info_dict['title']) > 200:
802 info_dict['title'] = info_dict['title'][:197] + '...'
804 # Keep for backwards compatibility
805 info_dict['stitle'] = info_dict['title']
807 if not 'format' in info_dict:
808 info_dict['format'] = info_dict['ext']
810 reason = self._match_entry(info_dict)
811 if reason is not None:
812 self.to_screen('[download] ' + reason)
815 self._num_downloads += 1
817 filename = self.prepare_filename(info_dict)
820 if self.params.get('forcetitle', False):
821 self.to_stdout(info_dict['fulltitle'])
822 if self.params.get('forceid', False):
823 self.to_stdout(info_dict['id'])
824 if self.params.get('forceurl', False):
825 # For RTMP URLs, also include the playpath
826 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
827 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
828 self.to_stdout(info_dict['thumbnail'])
829 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
830 self.to_stdout(info_dict['description'])
831 if self.params.get('forcefilename', False) and filename is not None:
832 self.to_stdout(filename)
833 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
834 self.to_stdout(formatSeconds(info_dict['duration']))
835 if self.params.get('forceformat', False):
836 self.to_stdout(info_dict['format'])
837 if self.params.get('forcejson', False):
838 info_dict['_filename'] = filename
839 self.to_stdout(json.dumps(info_dict))
841 # Do nothing else if in simulate mode
842 if self.params.get('simulate', False):
849 dn = os.path.dirname(encodeFilename(filename))
850 if dn != '' and not os.path.exists(dn):
852 except (OSError, IOError) as err:
853 self.report_error('unable to create directory ' + compat_str(err))
856 if self.params.get('writedescription', False):
857 descfn = filename + '.description'
858 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
859 self.to_screen('[info] Video description is already present')
862 self.to_screen('[info] Writing video description to: ' + descfn)
863 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
864 descfile.write(info_dict['description'])
865 except (KeyError, TypeError):
866 self.report_warning('There\'s no description to write.')
867 except (OSError, IOError):
868 self.report_error('Cannot write description file ' + descfn)
871 if self.params.get('writeannotations', False):
872 annofn = filename + '.annotations.xml'
873 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
874 self.to_screen('[info] Video annotations are already present')
877 self.to_screen('[info] Writing video annotations to: ' + annofn)
878 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
879 annofile.write(info_dict['annotations'])
880 except (KeyError, TypeError):
881 self.report_warning('There are no annotations to write.')
882 except (OSError, IOError):
883 self.report_error('Cannot write annotations file: ' + annofn)
886 subtitles_are_requested = any([self.params.get('writesubtitles', False),
887 self.params.get('writeautomaticsub')])
889 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
890 # subtitles download errors are already managed as troubles in relevant IE
891 # that way it will silently go on when used with unsupporting IE
892 subtitles = info_dict['subtitles']
893 sub_format = self.params.get('subtitlesformat', 'srt')
894 for sub_lang in subtitles.keys():
895 sub = subtitles[sub_lang]
899 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
900 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
901 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
903 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
904 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
906 except (OSError, IOError):
907 self.report_error('Cannot write subtitles file ' + descfn)
910 if self.params.get('writeinfojson', False):
911 infofn = os.path.splitext(filename)[0] + '.info.json'
912 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
913 self.to_screen('[info] Video description metadata is already present')
915 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
917 write_json_file(info_dict, encodeFilename(infofn))
918 except (OSError, IOError):
919 self.report_error('Cannot write metadata to JSON file ' + infofn)
922 if self.params.get('writethumbnail', False):
923 if info_dict.get('thumbnail') is not None:
924 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
925 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
926 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
927 self.to_screen('[%s] %s: Thumbnail is already present' %
928 (info_dict['extractor'], info_dict['id']))
930 self.to_screen('[%s] %s: Downloading thumbnail ...' %
931 (info_dict['extractor'], info_dict['id']))
933 uf = self.urlopen(info_dict['thumbnail'])
934 with open(thumb_filename, 'wb') as thumbf:
935 shutil.copyfileobj(uf, thumbf)
936 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
937 (info_dict['extractor'], info_dict['id'], thumb_filename))
938 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
939 self.report_warning('Unable to download thumbnail "%s": %s' %
940 (info_dict['thumbnail'], compat_str(err)))
942 if not self.params.get('skip_download', False):
943 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
948 fd = get_suitable_downloader(info)(self, self.params)
949 for ph in self._progress_hooks:
950 fd.add_progress_hook(ph)
951 return fd.download(name, info)
952 if info_dict.get('requested_formats') is not None:
955 merger = FFmpegMergerPP(self)
956 if not merger._get_executable():
958 self.report_warning('You have requested multiple '
959 'formats but ffmpeg or avconv are not installed.'
960 ' The formats won\'t be merged')
962 postprocessors = [merger]
963 for f in info_dict['requested_formats']:
964 new_info = dict(info_dict)
966 fname = self.prepare_filename(new_info)
967 fname = prepend_extension(fname, 'f%s' % f['format_id'])
968 downloaded.append(fname)
969 partial_success = dl(fname, new_info)
970 success = success and partial_success
971 info_dict['__postprocessors'] = postprocessors
972 info_dict['__files_to_merge'] = downloaded
975 success = dl(filename, info_dict)
976 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
977 self.report_error('unable to download video data: %s' % str(err))
979 except (OSError, IOError) as err:
980 raise UnavailableVideoError(err)
981 except (ContentTooShortError, ) as err:
982 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
987 self.post_process(filename, info_dict)
988 except (PostProcessingError) as err:
989 self.report_error('postprocessing: %s' % str(err))
992 self.record_download_archive(info_dict)
994 def download(self, url_list):
995 """Download a given list of URLs."""
996 if (len(url_list) > 1 and
997 '%' not in self.params['outtmpl']
998 and self.params.get('max_downloads') != 1):
999 raise SameFileError(self.params['outtmpl'])
1001 for url in url_list:
1003 #It also downloads the videos
1004 self.extract_info(url)
1005 except UnavailableVideoError:
1006 self.report_error('unable to download video')
1007 except MaxDownloadsReached:
1008 self.to_screen('[info] Maximum number of downloaded files reached.')
1011 return self._download_retcode
1013 def download_with_info_file(self, info_filename):
1014 with io.open(info_filename, 'r', encoding='utf-8') as f:
1017 self.process_ie_result(info, download=True)
1018 except DownloadError:
1019 webpage_url = info.get('webpage_url')
1020 if webpage_url is not None:
1021 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1022 return self.download([webpage_url])
1025 return self._download_retcode
1027 def post_process(self, filename, ie_info):
1028 """Run all the postprocessors on the given file."""
1029 info = dict(ie_info)
1030 info['filepath'] = filename
1033 if ie_info.get('__postprocessors') is not None:
1034 pps_chain.extend(ie_info['__postprocessors'])
1035 pps_chain.extend(self._pps)
1036 for pp in pps_chain:
1038 keep_video_wish, new_info = pp.run(info)
1039 if keep_video_wish is not None:
1041 keep_video = keep_video_wish
1042 elif keep_video is None:
1043 # No clear decision yet, let IE decide
1044 keep_video = keep_video_wish
1045 except PostProcessingError as e:
1046 self.report_error(e.msg)
1047 if keep_video is False and not self.params.get('keepvideo', False):
1049 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1050 os.remove(encodeFilename(filename))
1051 except (IOError, OSError):
1052 self.report_warning('Unable to remove downloaded video file')
1054 def _make_archive_id(self, info_dict):
1055 # Future-proof against any change in case
1056 # and backwards compatibility with prior versions
1057 extractor = info_dict.get('extractor_key')
1058 if extractor is None:
1059 if 'id' in info_dict:
1060 extractor = info_dict.get('ie_key') # key in a playlist
1061 if extractor is None:
1062 return None # Incomplete video information
1063 return extractor.lower() + ' ' + info_dict['id']
1065 def in_download_archive(self, info_dict):
1066 fn = self.params.get('download_archive')
1070 vid_id = self._make_archive_id(info_dict)
1072 return False # Incomplete video information
1075 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1076 for line in archive_file:
1077 if line.strip() == vid_id:
1079 except IOError as ioe:
1080 if ioe.errno != errno.ENOENT:
1084 def record_download_archive(self, info_dict):
1085 fn = self.params.get('download_archive')
1088 vid_id = self._make_archive_id(info_dict)
1090 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1091 archive_file.write(vid_id + '\n')
1094 def format_resolution(format, default='unknown'):
1095 if format.get('vcodec') == 'none':
1097 if format.get('resolution') is not None:
1098 return format['resolution']
1099 if format.get('height') is not None:
1100 if format.get('width') is not None:
1101 res = '%sx%s' % (format['width'], format['height'])
1103 res = '%sp' % format['height']
1104 elif format.get('width') is not None:
1105 res = '?x%d' % format['width']
1110 def list_formats(self, info_dict):
1111 def format_note(fdict):
1113 if fdict.get('ext') in ['f4f', 'f4m']:
1114 res += '(unsupported) '
1115 if fdict.get('format_note') is not None:
1116 res += fdict['format_note'] + ' '
1117 if fdict.get('tbr') is not None:
1118 res += '%4dk ' % fdict['tbr']
1119 if fdict.get('container') is not None:
1122 res += '%s container' % fdict['container']
1123 if (fdict.get('vcodec') is not None and
1124 fdict.get('vcodec') != 'none'):
1127 res += fdict['vcodec']
1128 if fdict.get('vbr') is not None:
1130 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1132 if fdict.get('vbr') is not None:
1133 res += '%4dk' % fdict['vbr']
1134 if fdict.get('acodec') is not None:
1137 if fdict['acodec'] == 'none':
1140 res += '%-5s' % fdict['acodec']
1141 elif fdict.get('abr') is not None:
1145 if fdict.get('abr') is not None:
1146 res += '@%3dk' % fdict['abr']
1147 if fdict.get('asr') is not None:
1148 res += ' (%5dHz)' % fdict['asr']
1149 if fdict.get('filesize') is not None:
1152 res += format_bytes(fdict['filesize'])
1155 def line(format, idlen=20):
1156 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1157 format['format_id'],
1159 self.format_resolution(format),
1160 format_note(format),
1163 formats = info_dict.get('formats', [info_dict])
1164 idlen = max(len('format code'),
1165 max(len(f['format_id']) for f in formats))
1166 formats_s = [line(f, idlen) for f in formats]
1167 if len(formats) > 1:
1168 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1169 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1171 header_line = line({
1172 'format_id': 'format code', 'ext': 'extension',
1173 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1174 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1175 (info_dict['id'], header_line, '\n'.join(formats_s)))
1177 def urlopen(self, req):
1178 """ Start an HTTP download """
1179 return self._opener.open(req, timeout=self._socket_timeout)
1181 def print_debug_header(self):
1182 if not self.params.get('verbose'):
1184 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1186 sp = subprocess.Popen(
1187 ['git', 'rev-parse', '--short', 'HEAD'],
1188 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1189 cwd=os.path.dirname(os.path.abspath(__file__)))
1190 out, err = sp.communicate()
1191 out = out.decode().strip()
1192 if re.match('[0-9a-f]+', out):
1193 write_string('[debug] Git HEAD: ' + out + '\n')
1199 write_string('[debug] Python version %s - %s' %
1200 (platform.python_version(), platform_name()) + '\n')
1203 for handler in self._opener.handlers:
1204 if hasattr(handler, 'proxies'):
1205 proxy_map.update(handler.proxies)
1206 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1208 def _setup_opener(self):
1209 timeout_val = self.params.get('socket_timeout')
1210 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1212 opts_cookiefile = self.params.get('cookiefile')
1213 opts_proxy = self.params.get('proxy')
1215 if opts_cookiefile is None:
1216 self.cookiejar = compat_cookiejar.CookieJar()
1218 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1220 if os.access(opts_cookiefile, os.R_OK):
1221 self.cookiejar.load()
1223 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1225 if opts_proxy is not None:
1226 if opts_proxy == '':
1229 proxies = {'http': opts_proxy, 'https': opts_proxy}
1231 proxies = compat_urllib_request.getproxies()
1232 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1233 if 'http' in proxies and 'https' not in proxies:
1234 proxies['https'] = proxies['http']
1235 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1237 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1238 https_handler = make_HTTPS_handler(
1239 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1240 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1241 opener = compat_urllib_request.build_opener(
1242 https_handler, proxy_handler, cookie_processor, ydlh)
1243 # Delete the default user-agent header, which would otherwise apply in
1244 # cases where our custom HTTP handler doesn't come into play
1245 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1246 opener.addheaders = []
1247 self._opener = opener