2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
28 compat_urllib_request,
49 UnavailableVideoError,
56 from .extractor import get_info_extractor, gen_extractors
57 from .downloader import get_suitable_downloader
58 from .postprocessor import FFmpegMergerPP
59 from .version import __version__
62 class YoutubeDL(object):
65 YoutubeDL objects are the ones responsible of downloading the
66 actual video file and writing it to disk if the user has requested
67 it, among some other tasks. In most cases there should be one per
68 program. As, given a video URL, the downloader doesn't know how to
69 extract all the needed information, task that InfoExtractors do, it
70 has to pass the URL to one of them.
72 For this, YoutubeDL objects have a method that allows
73 InfoExtractors to be registered in a given order. When it is passed
74 a URL, the YoutubeDL object handles it to the first InfoExtractor it
75 finds that reports being able to handle it. The InfoExtractor extracts
76 all the information about the video or videos the URL refers to, and
77 YoutubeDL process the extracted information, possibly using a File
78 Downloader to download the video.
80 YoutubeDL objects accept a lot of parameters. In order not to saturate
81 the object constructor with arguments, it receives a dictionary of
82 options instead. These options are available through the params
83 attribute for the InfoExtractors to use. The YoutubeDL also
84 registers itself as the downloader in charge for the InfoExtractors
85 that are added to it, so this is a "mutual registration".
89 username: Username for authentication purposes.
90 password: Password for authentication purposes.
91 videopassword: Password for acces a video.
92 usenetrc: Use netrc for authentication instead.
93 verbose: Print additional info to stdout.
94 quiet: Do not print messages to stdout.
95 forceurl: Force printing final URL.
96 forcetitle: Force printing title.
97 forceid: Force printing ID.
98 forcethumbnail: Force printing thumbnail URL.
99 forcedescription: Force printing description.
100 forcefilename: Force printing final filename.
101 forceduration: Force printing duration.
102 forcejson: Force printing info_dict as JSON.
103 simulate: Do not download the video files.
104 format: Video format code.
105 format_limit: Highest quality format to try.
106 outtmpl: Template for output names.
107 restrictfilenames: Do not allow "&" and spaces in file names
108 ignoreerrors: Do not stop on download errors.
109 nooverwrites: Prevent overwriting files.
110 playliststart: Playlist item to start at.
111 playlistend: Playlist item to end at.
112 matchtitle: Download only matching titles.
113 rejecttitle: Reject downloads for matching titles.
114 logger: Log messages to a logging.Logger instance.
115 logtostderr: Log messages to stderr instead of stdout.
116 writedescription: Write the video description to a .description file
117 writeinfojson: Write the video description to a .info.json file
118 writeannotations: Write the video annotations to a .annotations.xml file
119 writethumbnail: Write the thumbnail image to a file
120 writesubtitles: Write the video subtitles to a file
121 writeautomaticsub: Write the automatic subtitles to a file
122 allsubtitles: Downloads all the subtitles of the video
123 (requires writesubtitles or writeautomaticsub)
124 listsubtitles: Lists all available subtitles for the video
125 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
126 subtitleslangs: List of languages of the subtitles to download
127 keepvideo: Keep the video file after post-processing
128 daterange: A DateRange object, download only if the upload_date is in the range.
129 skip_download: Skip the actual download of the video file
130 cachedir: Location of the cache files in the filesystem.
131 None to disable filesystem cache.
132 noplaylist: Download single video instead of a playlist if in doubt.
133 age_limit: An integer representing the user's age in years.
134 Unsuitable videos for the given age are skipped.
135 min_views: An integer representing the minimum view count the video
136 must have in order to not be skipped.
137 Videos without view count information are always
138 downloaded. None for no limit.
139 max_views: An integer representing the maximum view count.
140 Videos that are more popular than that are not
142 Videos without view count information are always
143 downloaded. None for no limit.
144 download_archive: File name of a file where all downloads are recorded.
145 Videos already present in the file are not downloaded
147 cookiefile: File name where cookies should be read from and dumped to.
148 nocheckcertificate:Do not verify SSL certificates
149 proxy: URL of the proxy server to use
150 socket_timeout: Time to wait for unresponsive hosts, in seconds
151 bidi_workaround: Work around buggy terminals without bidirectional text
152 support, using fridibi
153 debug_printtraffic:Print out sent and received HTTP traffic
154 include_ads: Download ads as well
156 The following parameters are not used by YoutubeDL itself, they are used by
158 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
159 noresizebuffer, retries, continuedl, noprogress, consoletitle
161 The following options are used by the post processors:
162 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
163 otherwise prefer avconv.
169 _download_retcode = None
170 _num_downloads = None
173 def __init__(self, params=None):
174 """Create a FileDownloader object with the given options."""
178 self._ies_instances = {}
180 self._progress_hooks = []
181 self._download_retcode = 0
182 self._num_downloads = 0
183 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
184 self._err_file = sys.stderr
187 if params.get('bidi_workaround', False):
190 master, slave = pty.openpty()
191 width = get_term_width()
195 width_args = ['-w', str(width)]
197 stdin=subprocess.PIPE,
199 stderr=self._err_file)
201 self._output_process = subprocess.Popen(
202 ['bidiv'] + width_args, **sp_kwargs
205 self._output_process = subprocess.Popen(
206 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
207 self._output_channel = os.fdopen(master, 'rb')
208 except OSError as ose:
210 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
214 if (sys.version_info >= (3,) and sys.platform != 'win32' and
215 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
216 and not params['restrictfilenames']):
217 # On Python 3, the Unicode filesystem API will throw errors (#1474)
219 'Assuming --restrict-filenames since file system encoding '
220 'cannot encode all charactes. '
221 'Set the LC_ALL environment variable to fix this.')
222 self.params['restrictfilenames'] = True
224 if '%(stitle)s' in self.params.get('outtmpl', ''):
225 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
229 def add_info_extractor(self, ie):
230 """Add an InfoExtractor object to the end of the list."""
232 self._ies_instances[ie.ie_key()] = ie
233 ie.set_downloader(self)
235 def get_info_extractor(self, ie_key):
237 Get an instance of an IE with name ie_key, it will try to get one from
238 the _ies list, if there's no instance it will create a new one and add
239 it to the extractor list.
241 ie = self._ies_instances.get(ie_key)
243 ie = get_info_extractor(ie_key)()
244 self.add_info_extractor(ie)
247 def add_default_info_extractors(self):
249 Add the InfoExtractors returned by gen_extractors to the end of the list
251 for ie in gen_extractors():
252 self.add_info_extractor(ie)
254 def add_post_processor(self, pp):
255 """Add a PostProcessor object to the end of the chain."""
257 pp.set_downloader(self)
259 def add_progress_hook(self, ph):
260 """Add the progress hook (currently only for the file downloader)"""
261 self._progress_hooks.append(ph)
263 def _bidi_workaround(self, message):
264 if not hasattr(self, '_output_channel'):
267 assert hasattr(self, '_output_process')
268 assert type(message) == type('')
269 line_count = message.count('\n') + 1
270 self._output_process.stdin.write((message + '\n').encode('utf-8'))
271 self._output_process.stdin.flush()
272 res = ''.join(self._output_channel.readline().decode('utf-8')
273 for _ in range(line_count))
274 return res[:-len('\n')]
276 def to_screen(self, message, skip_eol=False):
277 """Print message to stdout if not in quiet mode."""
278 return self.to_stdout(message, skip_eol, check_quiet=True)
280 def to_stdout(self, message, skip_eol=False, check_quiet=False):
281 """Print message to stdout if not in quiet mode."""
282 if self.params.get('logger'):
283 self.params['logger'].debug(message)
284 elif not check_quiet or not self.params.get('quiet', False):
285 message = self._bidi_workaround(message)
286 terminator = ['\n', ''][skip_eol]
287 output = message + terminator
289 write_string(output, self._screen_file)
291 def to_stderr(self, message):
292 """Print message to stderr."""
293 assert type(message) == type('')
294 if self.params.get('logger'):
295 self.params['logger'].error(message)
297 message = self._bidi_workaround(message)
298 output = message + '\n'
299 write_string(output, self._err_file)
301 def to_console_title(self, message):
302 if not self.params.get('consoletitle', False):
304 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
305 # c_wchar_p() might not be necessary if `message` is
306 # already of type unicode()
307 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
308 elif 'TERM' in os.environ:
309 write_string('\033]0;%s\007' % message, self._screen_file)
311 def save_console_title(self):
312 if not self.params.get('consoletitle', False):
314 if 'TERM' in os.environ:
315 # Save the title on stack
316 write_string('\033[22;0t', self._screen_file)
318 def restore_console_title(self):
319 if not self.params.get('consoletitle', False):
321 if 'TERM' in os.environ:
322 # Restore the title from stack
323 write_string('\033[23;0t', self._screen_file)
326 self.save_console_title()
329 def __exit__(self, *args):
330 self.restore_console_title()
332 if self.params.get('cookiefile') is not None:
333 self.cookiejar.save()
335 def trouble(self, message=None, tb=None):
336 """Determine action to take when a download problem appears.
338 Depending on if the downloader has been configured to ignore
339 download errors or not, this method may throw an exception or
340 not when errors are found, after printing the message.
342 tb, if given, is additional traceback information.
344 if message is not None:
345 self.to_stderr(message)
346 if self.params.get('verbose'):
348 if sys.exc_info()[0]: # if .trouble has been called from an except block
350 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
351 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
352 tb += compat_str(traceback.format_exc())
354 tb_data = traceback.format_list(traceback.extract_stack())
355 tb = ''.join(tb_data)
357 if not self.params.get('ignoreerrors', False):
358 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
359 exc_info = sys.exc_info()[1].exc_info
361 exc_info = sys.exc_info()
362 raise DownloadError(message, exc_info)
363 self._download_retcode = 1
365 def report_warning(self, message):
367 Print the message to stderr, it will be prefixed with 'WARNING:'
368 If stderr is a tty file the 'WARNING:' will be colored
370 if self._err_file.isatty() and os.name != 'nt':
371 _msg_header = '\033[0;33mWARNING:\033[0m'
373 _msg_header = 'WARNING:'
374 warning_message = '%s %s' % (_msg_header, message)
375 self.to_stderr(warning_message)
377 def report_error(self, message, tb=None):
379 Do the same as trouble, but prefixes the message with 'ERROR:', colored
380 in red if stderr is a tty file.
382 if self._err_file.isatty() and os.name != 'nt':
383 _msg_header = '\033[0;31mERROR:\033[0m'
385 _msg_header = 'ERROR:'
386 error_message = '%s %s' % (_msg_header, message)
387 self.trouble(error_message, tb)
389 def report_file_already_downloaded(self, file_name):
390 """Report file has already been fully downloaded."""
392 self.to_screen('[download] %s has already been downloaded' % file_name)
393 except UnicodeEncodeError:
394 self.to_screen('[download] The file has already been downloaded')
396 def increment_downloads(self):
397 """Increment the ordinal that assigns a number to each file."""
398 self._num_downloads += 1
400 def prepare_filename(self, info_dict):
401 """Generate the output filename."""
403 template_dict = dict(info_dict)
405 template_dict['epoch'] = int(time.time())
406 autonumber_size = self.params.get('autonumber_size')
407 if autonumber_size is None:
409 autonumber_templ = '%0' + str(autonumber_size) + 'd'
410 template_dict['autonumber'] = autonumber_templ % self._num_downloads
411 if template_dict.get('playlist_index') is not None:
412 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
414 sanitize = lambda k, v: sanitize_filename(
416 restricted=self.params.get('restrictfilenames'),
418 template_dict = dict((k, sanitize(k, v))
419 for k, v in template_dict.items()
421 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
423 tmpl = os.path.expanduser(self.params['outtmpl'])
424 filename = tmpl % template_dict
426 except ValueError as err:
427 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
430 def _match_entry(self, info_dict):
431 """ Returns None iff the file should be downloaded """
433 video_title = info_dict.get('title', info_dict.get('id', 'video'))
434 if 'title' in info_dict:
435 # This can happen when we're just evaluating the playlist
436 title = info_dict['title']
437 matchtitle = self.params.get('matchtitle', False)
439 if not re.search(matchtitle, title, re.IGNORECASE):
440 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
441 rejecttitle = self.params.get('rejecttitle', False)
443 if re.search(rejecttitle, title, re.IGNORECASE):
444 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
445 date = info_dict.get('upload_date', None)
447 dateRange = self.params.get('daterange', DateRange())
448 if date not in dateRange:
449 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
450 view_count = info_dict.get('view_count', None)
451 if view_count is not None:
452 min_views = self.params.get('min_views')
453 if min_views is not None and view_count < min_views:
454 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
455 max_views = self.params.get('max_views')
456 if max_views is not None and view_count > max_views:
457 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
458 age_limit = self.params.get('age_limit')
459 if age_limit is not None:
460 if age_limit < info_dict.get('age_limit', 0):
461 return 'Skipping "' + title + '" because it is age restricted'
462 if self.in_download_archive(info_dict):
463 return '%s has already been recorded in archive' % video_title
467 def add_extra_info(info_dict, extra_info):
468 '''Set the keys from extra_info in info dict if they are missing'''
469 for key, value in extra_info.items():
470 info_dict.setdefault(key, value)
472 def extract_info(self, url, download=True, ie_key=None, extra_info={},
475 Returns a list with a dictionary for each video we find.
476 If 'download', also downloads the videos.
477 extra_info is a dict containing the extra values to add to each result
481 ies = [self.get_info_extractor(ie_key)]
486 if not ie.suitable(url):
490 self.report_warning('The program functionality for this site has been marked as broken, '
491 'and will probably not work.')
494 ie_result = ie.extract(url)
495 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
497 if isinstance(ie_result, list):
498 # Backwards compatibility: old IE result format
500 '_type': 'compat_list',
501 'entries': ie_result,
503 self.add_extra_info(ie_result,
505 'extractor': ie.IE_NAME,
507 'webpage_url_basename': url_basename(url),
508 'extractor_key': ie.ie_key(),
511 return self.process_ie_result(ie_result, download, extra_info)
514 except ExtractorError as de: # An error we somewhat expected
515 self.report_error(compat_str(de), de.format_traceback())
517 except Exception as e:
518 if self.params.get('ignoreerrors', False):
519 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
524 self.report_error('no suitable InfoExtractor: %s' % url)
526 def process_ie_result(self, ie_result, download=True, extra_info={}):
528 Take the result of the ie(may be modified) and resolve all unresolved
529 references (URLs, playlist items).
531 It will also download the videos if 'download'.
532 Returns the resolved ie_result.
535 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
536 if result_type == 'video':
537 self.add_extra_info(ie_result, extra_info)
538 return self.process_video_result(ie_result, download=download)
539 elif result_type == 'url':
540 # We have to add extra_info to the results because it may be
541 # contained in a playlist
542 return self.extract_info(ie_result['url'],
544 ie_key=ie_result.get('ie_key'),
545 extra_info=extra_info)
546 elif result_type == 'url_transparent':
547 # Use the information from the embedding page
548 info = self.extract_info(
549 ie_result['url'], ie_key=ie_result.get('ie_key'),
550 extra_info=extra_info, download=False, process=False)
552 def make_result(embedded_info):
553 new_result = ie_result.copy()
554 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
555 'entries', 'ie_key', 'duration',
556 'subtitles', 'annotations', 'format',
557 'thumbnail', 'thumbnails'):
560 if f in embedded_info:
561 new_result[f] = embedded_info[f]
563 new_result = make_result(info)
565 assert new_result.get('_type') != 'url_transparent'
566 if new_result.get('_type') == 'compat_list':
567 new_result['entries'] = [
568 make_result(e) for e in new_result['entries']]
570 return self.process_ie_result(
571 new_result, download=download, extra_info=extra_info)
572 elif result_type == 'playlist':
573 # We process each entry in the playlist
574 playlist = ie_result.get('title', None) or ie_result.get('id', None)
575 self.to_screen('[download] Downloading playlist: %s' % playlist)
577 playlist_results = []
579 n_all_entries = len(ie_result['entries'])
580 playliststart = self.params.get('playliststart', 1) - 1
581 playlistend = self.params.get('playlistend', None)
582 # For backwards compatibility, interpret -1 as whole list
583 if playlistend == -1:
586 entries = ie_result['entries'][playliststart:playlistend]
587 n_entries = len(entries)
590 "[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
591 (ie_result['extractor'], playlist, n_all_entries, n_entries))
593 for i, entry in enumerate(entries, 1):
594 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
596 'playlist': playlist,
597 'playlist_index': i + playliststart,
598 'extractor': ie_result['extractor'],
599 'webpage_url': ie_result['webpage_url'],
600 'webpage_url_basename': url_basename(ie_result['webpage_url']),
601 'extractor_key': ie_result['extractor_key'],
604 reason = self._match_entry(entry)
605 if reason is not None:
606 self.to_screen('[download] ' + reason)
609 entry_result = self.process_ie_result(entry,
612 playlist_results.append(entry_result)
613 ie_result['entries'] = playlist_results
615 elif result_type == 'compat_list':
617 self.add_extra_info(r,
619 'extractor': ie_result['extractor'],
620 'webpage_url': ie_result['webpage_url'],
621 'webpage_url_basename': url_basename(ie_result['webpage_url']),
622 'extractor_key': ie_result['extractor_key'],
625 ie_result['entries'] = [
626 self.process_ie_result(_fixup(r), download, extra_info)
627 for r in ie_result['entries']
631 raise Exception('Invalid result type: %s' % result_type)
633 def select_format(self, format_spec, available_formats):
634 if format_spec == 'best' or format_spec is None:
635 return available_formats[-1]
636 elif format_spec == 'worst':
637 return available_formats[0]
639 extensions = ['mp4', 'flv', 'webm', '3gp']
640 if format_spec in extensions:
641 filter_f = lambda f: f['ext'] == format_spec
643 filter_f = lambda f: f['format_id'] == format_spec
644 matches = list(filter(filter_f, available_formats))
649 def process_video_result(self, info_dict, download=True):
650 assert info_dict.get('_type', 'video') == 'video'
652 if 'playlist' not in info_dict:
653 # It isn't part of a playlist
654 info_dict['playlist'] = None
655 info_dict['playlist_index'] = None
657 # This extractors handle format selection themselves
658 if info_dict['extractor'] in ['Youku']:
660 self.process_info(info_dict)
663 # We now pick which formats have to be downloaded
664 if info_dict.get('formats') is None:
665 # There's only one format available
666 formats = [info_dict]
668 formats = info_dict['formats']
670 # We check that all the formats have the format and format_id fields
671 for (i, format) in enumerate(formats):
672 if format.get('format_id') is None:
673 format['format_id'] = compat_str(i)
674 if format.get('format') is None:
675 format['format'] = '{id} - {res}{note}'.format(
676 id=format['format_id'],
677 res=self.format_resolution(format),
678 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
680 # Automatically determine file extension if missing
681 if 'ext' not in format:
682 format['ext'] = determine_ext(format['url'])
684 format_limit = self.params.get('format_limit', None)
686 formats = list(takewhile_inclusive(
687 lambda f: f['format_id'] != format_limit, formats
690 # TODO Central sorting goes here
692 if formats[0] is not info_dict:
693 # only set the 'formats' fields if the original info_dict list them
694 # otherwise we end up with a circular reference, the first (and unique)
695 # element in the 'formats' field in info_dict is info_dict itself,
696 # wich can't be exported to json
697 info_dict['formats'] = formats
698 if self.params.get('listformats', None):
699 self.list_formats(info_dict)
702 req_format = self.params.get('format', 'best')
703 if req_format is None:
705 formats_to_download = []
706 # The -1 is for supporting YoutubeIE
707 if req_format in ('-1', 'all'):
708 formats_to_download = formats
710 # We can accept formats requested in the format: 34/5/best, we pick
711 # the first that is available, starting from left
712 req_formats = req_format.split('/')
713 for rf in req_formats:
714 if re.match(r'.+?\+.+?', rf) is not None:
715 # Two formats have been requested like '137+139'
716 format_1, format_2 = rf.split('+')
717 formats_info = (self.select_format(format_1, formats),
718 self.select_format(format_2, formats))
719 if all(formats_info):
721 'requested_formats': formats_info,
723 'ext': formats_info[0]['ext'],
726 selected_format = None
728 selected_format = self.select_format(rf, formats)
729 if selected_format is not None:
730 formats_to_download = [selected_format]
732 if not formats_to_download:
733 raise ExtractorError('requested format not available',
737 if len(formats_to_download) > 1:
738 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
739 for format in formats_to_download:
740 new_info = dict(info_dict)
741 new_info.update(format)
742 self.process_info(new_info)
743 # We update the info dict with the best quality format (backwards compatibility)
744 info_dict.update(formats_to_download[-1])
747 def process_info(self, info_dict):
748 """Process a single resolved IE result."""
750 assert info_dict.get('_type', 'video') == 'video'
751 #We increment the download the download count here to match the previous behaviour.
752 self.increment_downloads()
754 info_dict['fulltitle'] = info_dict['title']
755 if len(info_dict['title']) > 200:
756 info_dict['title'] = info_dict['title'][:197] + '...'
758 # Keep for backwards compatibility
759 info_dict['stitle'] = info_dict['title']
761 if not 'format' in info_dict:
762 info_dict['format'] = info_dict['ext']
764 reason = self._match_entry(info_dict)
765 if reason is not None:
766 self.to_screen('[download] ' + reason)
769 max_downloads = self.params.get('max_downloads')
770 if max_downloads is not None:
771 if self._num_downloads > int(max_downloads):
772 raise MaxDownloadsReached()
774 filename = self.prepare_filename(info_dict)
777 if self.params.get('forcetitle', False):
778 self.to_stdout(info_dict['fulltitle'])
779 if self.params.get('forceid', False):
780 self.to_stdout(info_dict['id'])
781 if self.params.get('forceurl', False):
782 # For RTMP URLs, also include the playpath
783 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
784 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
785 self.to_stdout(info_dict['thumbnail'])
786 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
787 self.to_stdout(info_dict['description'])
788 if self.params.get('forcefilename', False) and filename is not None:
789 self.to_stdout(filename)
790 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
791 self.to_stdout(formatSeconds(info_dict['duration']))
792 if self.params.get('forceformat', False):
793 self.to_stdout(info_dict['format'])
794 if self.params.get('forcejson', False):
795 info_dict['_filename'] = filename
796 self.to_stdout(json.dumps(info_dict))
798 # Do nothing else if in simulate mode
799 if self.params.get('simulate', False):
806 dn = os.path.dirname(encodeFilename(filename))
807 if dn != '' and not os.path.exists(dn):
809 except (OSError, IOError) as err:
810 self.report_error('unable to create directory ' + compat_str(err))
813 if self.params.get('writedescription', False):
814 descfn = filename + '.description'
815 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
816 self.to_screen('[info] Video description is already present')
819 self.to_screen('[info] Writing video description to: ' + descfn)
820 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
821 descfile.write(info_dict['description'])
822 except (KeyError, TypeError):
823 self.report_warning('There\'s no description to write.')
824 except (OSError, IOError):
825 self.report_error('Cannot write description file ' + descfn)
828 if self.params.get('writeannotations', False):
829 annofn = filename + '.annotations.xml'
830 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
831 self.to_screen('[info] Video annotations are already present')
834 self.to_screen('[info] Writing video annotations to: ' + annofn)
835 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
836 annofile.write(info_dict['annotations'])
837 except (KeyError, TypeError):
838 self.report_warning('There are no annotations to write.')
839 except (OSError, IOError):
840 self.report_error('Cannot write annotations file: ' + annofn)
843 subtitles_are_requested = any([self.params.get('writesubtitles', False),
844 self.params.get('writeautomaticsub')])
846 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
847 # subtitles download errors are already managed as troubles in relevant IE
848 # that way it will silently go on when used with unsupporting IE
849 subtitles = info_dict['subtitles']
850 sub_format = self.params.get('subtitlesformat', 'srt')
851 for sub_lang in subtitles.keys():
852 sub = subtitles[sub_lang]
856 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
857 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
858 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
860 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
861 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
863 except (OSError, IOError):
864 self.report_error('Cannot write subtitles file ' + descfn)
867 if self.params.get('writeinfojson', False):
868 infofn = os.path.splitext(filename)[0] + '.info.json'
869 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
870 self.to_screen('[info] Video description metadata is already present')
872 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
874 write_json_file(info_dict, encodeFilename(infofn))
875 except (OSError, IOError):
876 self.report_error('Cannot write metadata to JSON file ' + infofn)
879 if self.params.get('writethumbnail', False):
880 if info_dict.get('thumbnail') is not None:
881 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
882 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
883 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
884 self.to_screen('[%s] %s: Thumbnail is already present' %
885 (info_dict['extractor'], info_dict['id']))
887 self.to_screen('[%s] %s: Downloading thumbnail ...' %
888 (info_dict['extractor'], info_dict['id']))
890 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
891 with open(thumb_filename, 'wb') as thumbf:
892 shutil.copyfileobj(uf, thumbf)
893 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
894 (info_dict['extractor'], info_dict['id'], thumb_filename))
895 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
896 self.report_warning('Unable to download thumbnail "%s": %s' %
897 (info_dict['thumbnail'], compat_str(err)))
899 if not self.params.get('skip_download', False):
900 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
905 fd = get_suitable_downloader(info)(self, self.params)
906 for ph in self._progress_hooks:
907 fd.add_progress_hook(ph)
908 return fd.download(name, info)
909 if info_dict.get('requested_formats') is not None:
912 merger = FFmpegMergerPP(self)
913 if not merger._get_executable():
915 self.report_warning('You have requested multiple '
916 'formats but ffmpeg or avconv are not installed.'
917 ' The formats won\'t be merged')
919 postprocessors = [merger]
920 for f in info_dict['requested_formats']:
921 new_info = dict(info_dict)
923 fname = self.prepare_filename(new_info)
924 fname = prepend_extension(fname, 'f%s' % f['format_id'])
925 downloaded.append(fname)
926 partial_success = dl(fname, new_info)
927 success = success and partial_success
928 info_dict['__postprocessors'] = postprocessors
929 info_dict['__files_to_merge'] = downloaded
932 success = dl(filename, info_dict)
933 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
934 self.report_error('unable to download video data: %s' % str(err))
936 except (OSError, IOError) as err:
937 raise UnavailableVideoError(err)
938 except (ContentTooShortError, ) as err:
939 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
944 self.post_process(filename, info_dict)
945 except (PostProcessingError) as err:
946 self.report_error('postprocessing: %s' % str(err))
949 self.record_download_archive(info_dict)
951 def download(self, url_list):
952 """Download a given list of URLs."""
953 if (len(url_list) > 1 and
954 '%' not in self.params['outtmpl']
955 and self.params.get('max_downloads') != 1):
956 raise SameFileError(self.params['outtmpl'])
960 #It also downloads the videos
961 self.extract_info(url)
962 except UnavailableVideoError:
963 self.report_error('unable to download video')
964 except MaxDownloadsReached:
965 self.to_screen('[info] Maximum number of downloaded files reached.')
968 return self._download_retcode
970 def download_with_info_file(self, info_filename):
971 with io.open(info_filename, 'r', encoding='utf-8') as f:
974 self.process_ie_result(info, download=True)
975 except DownloadError:
976 webpage_url = info.get('webpage_url')
977 if webpage_url is not None:
978 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
979 return self.download([webpage_url])
982 return self._download_retcode
984 def post_process(self, filename, ie_info):
985 """Run all the postprocessors on the given file."""
987 info['filepath'] = filename
990 if ie_info.get('__postprocessors') is not None:
991 pps_chain.extend(ie_info['__postprocessors'])
992 pps_chain.extend(self._pps)
995 keep_video_wish, new_info = pp.run(info)
996 if keep_video_wish is not None:
998 keep_video = keep_video_wish
999 elif keep_video is None:
1000 # No clear decision yet, let IE decide
1001 keep_video = keep_video_wish
1002 except PostProcessingError as e:
1003 self.report_error(e.msg)
1004 if keep_video is False and not self.params.get('keepvideo', False):
1006 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1007 os.remove(encodeFilename(filename))
1008 except (IOError, OSError):
1009 self.report_warning('Unable to remove downloaded video file')
1011 def _make_archive_id(self, info_dict):
1012 # Future-proof against any change in case
1013 # and backwards compatibility with prior versions
1014 extractor = info_dict.get('extractor_key')
1015 if extractor is None:
1016 if 'id' in info_dict:
1017 extractor = info_dict.get('ie_key') # key in a playlist
1018 if extractor is None:
1019 return None # Incomplete video information
1020 return extractor.lower() + ' ' + info_dict['id']
1022 def in_download_archive(self, info_dict):
1023 fn = self.params.get('download_archive')
1027 vid_id = self._make_archive_id(info_dict)
1029 return False # Incomplete video information
1032 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1033 for line in archive_file:
1034 if line.strip() == vid_id:
1036 except IOError as ioe:
1037 if ioe.errno != errno.ENOENT:
1041 def record_download_archive(self, info_dict):
1042 fn = self.params.get('download_archive')
1045 vid_id = self._make_archive_id(info_dict)
1047 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1048 archive_file.write(vid_id + '\n')
1051 def format_resolution(format, default='unknown'):
1052 if format.get('vcodec') == 'none':
1054 if format.get('resolution') is not None:
1055 return format['resolution']
1056 if format.get('height') is not None:
1057 if format.get('width') is not None:
1058 res = '%sx%s' % (format['width'], format['height'])
1060 res = '%sp' % format['height']
1061 elif format.get('width') is not None:
1062 res = '?x%d' % format['width']
1067 def list_formats(self, info_dict):
1068 def format_note(fdict):
1070 if fdict.get('ext') in ['f4f', 'f4m']:
1071 res += '(unsupported) '
1072 if fdict.get('format_note') is not None:
1073 res += fdict['format_note'] + ' '
1074 if fdict.get('tbr') is not None:
1075 res += '%4dk ' % fdict['tbr']
1076 if (fdict.get('vcodec') is not None and
1077 fdict.get('vcodec') != 'none'):
1078 res += '%-5s' % fdict['vcodec']
1079 if fdict.get('vbr') is not None:
1081 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1083 if fdict.get('vbr') is not None:
1084 res += '%4dk' % fdict['vbr']
1085 if fdict.get('acodec') is not None:
1088 res += '%-5s' % fdict['acodec']
1089 elif fdict.get('abr') is not None:
1093 if fdict.get('abr') is not None:
1094 res += '@%3dk' % fdict['abr']
1095 if fdict.get('filesize') is not None:
1098 res += format_bytes(fdict['filesize'])
1101 def line(format, idlen=20):
1102 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1103 format['format_id'],
1105 self.format_resolution(format),
1106 format_note(format),
1109 formats = info_dict.get('formats', [info_dict])
1110 idlen = max(len('format code'),
1111 max(len(f['format_id']) for f in formats))
1112 formats_s = [line(f, idlen) for f in formats]
1113 if len(formats) > 1:
1114 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1115 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1117 header_line = line({
1118 'format_id': 'format code', 'ext': 'extension',
1119 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1120 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1121 (info_dict['id'], header_line, '\n'.join(formats_s)))
1123 def urlopen(self, req):
1124 """ Start an HTTP download """
1125 return self._opener.open(req)
1127 def print_debug_header(self):
1128 if not self.params.get('verbose'):
1130 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1132 sp = subprocess.Popen(
1133 ['git', 'rev-parse', '--short', 'HEAD'],
1134 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1135 cwd=os.path.dirname(os.path.abspath(__file__)))
1136 out, err = sp.communicate()
1137 out = out.decode().strip()
1138 if re.match('[0-9a-f]+', out):
1139 write_string('[debug] Git HEAD: ' + out + '\n')
1145 write_string('[debug] Python version %s - %s' %
1146 (platform.python_version(), platform_name()) + '\n')
1149 for handler in self._opener.handlers:
1150 if hasattr(handler, 'proxies'):
1151 proxy_map.update(handler.proxies)
1152 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1154 def _setup_opener(self):
1155 timeout_val = self.params.get('socket_timeout')
1156 timeout = 600 if timeout_val is None else float(timeout_val)
1158 opts_cookiefile = self.params.get('cookiefile')
1159 opts_proxy = self.params.get('proxy')
1161 if opts_cookiefile is None:
1162 self.cookiejar = compat_cookiejar.CookieJar()
1164 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1166 if os.access(opts_cookiefile, os.R_OK):
1167 self.cookiejar.load()
1169 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1171 if opts_proxy is not None:
1172 if opts_proxy == '':
1175 proxies = {'http': opts_proxy, 'https': opts_proxy}
1177 proxies = compat_urllib_request.getproxies()
1178 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1179 if 'http' in proxies and 'https' not in proxies:
1180 proxies['https'] = proxies['http']
1181 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1183 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1184 https_handler = make_HTTPS_handler(
1185 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1186 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1187 opener = compat_urllib_request.build_opener(
1188 https_handler, proxy_handler, cookie_processor, ydlh)
1189 # Delete the default user-agent header, which would otherwise apply in
1190 # cases where our custom HTTP handler doesn't come into play
1191 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1192 opener.addheaders = []
1193 self._opener = opener
1195 # TODO remove this global modification
1196 compat_urllib_request.install_opener(opener)
1197 socket.setdefaulttimeout(timeout)