2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
49 UnavailableVideoError,
54 from .extractor import get_info_extractor, gen_extractors
55 from .FileDownloader import FileDownloader
56 from .version import __version__
59 class YoutubeDL(object):
62 YoutubeDL objects are the ones responsible of downloading the
63 actual video file and writing it to disk if the user has requested
64 it, among some other tasks. In most cases there should be one per
65 program. As, given a video URL, the downloader doesn't know how to
66 extract all the needed information, task that InfoExtractors do, it
67 has to pass the URL to one of them.
69 For this, YoutubeDL objects have a method that allows
70 InfoExtractors to be registered in a given order. When it is passed
71 a URL, the YoutubeDL object handles it to the first InfoExtractor it
72 finds that reports being able to handle it. The InfoExtractor extracts
73 all the information about the video or videos the URL refers to, and
74 YoutubeDL process the extracted information, possibly using a File
75 Downloader to download the video.
77 YoutubeDL objects accept a lot of parameters. In order not to saturate
78 the object constructor with arguments, it receives a dictionary of
79 options instead. These options are available through the params
80 attribute for the InfoExtractors to use. The YoutubeDL also
81 registers itself as the downloader in charge for the InfoExtractors
82 that are added to it, so this is a "mutual registration".
86 username: Username for authentication purposes.
87 password: Password for authentication purposes.
88 videopassword: Password for acces a video.
89 usenetrc: Use netrc for authentication instead.
90 verbose: Print additional info to stdout.
91 quiet: Do not print messages to stdout.
92 forceurl: Force printing final URL.
93 forcetitle: Force printing title.
94 forceid: Force printing ID.
95 forcethumbnail: Force printing thumbnail URL.
96 forcedescription: Force printing description.
97 forcefilename: Force printing final filename.
98 forceduration: Force printing duration.
99 forcejson: Force printing info_dict as JSON.
100 simulate: Do not download the video files.
101 format: Video format code.
102 format_limit: Highest quality format to try.
103 outtmpl: Template for output names.
104 restrictfilenames: Do not allow "&" and spaces in file names
105 ignoreerrors: Do not stop on download errors.
106 nooverwrites: Prevent overwriting files.
107 playliststart: Playlist item to start at.
108 playlistend: Playlist item to end at.
109 matchtitle: Download only matching titles.
110 rejecttitle: Reject downloads for matching titles.
111 logger: Log messages to a logging.Logger instance.
112 logtostderr: Log messages to stderr instead of stdout.
113 writedescription: Write the video description to a .description file
114 writeinfojson: Write the video description to a .info.json file
115 writeannotations: Write the video annotations to a .annotations.xml file
116 writethumbnail: Write the thumbnail image to a file
117 writesubtitles: Write the video subtitles to a file
118 writeautomaticsub: Write the automatic subtitles to a file
119 allsubtitles: Downloads all the subtitles of the video
120 (requires writesubtitles or writeautomaticsub)
121 listsubtitles: Lists all available subtitles for the video
122 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
123 subtitleslangs: List of languages of the subtitles to download
124 keepvideo: Keep the video file after post-processing
125 daterange: A DateRange object, download only if the upload_date is in the range.
126 skip_download: Skip the actual download of the video file
127 cachedir: Location of the cache files in the filesystem.
128 None to disable filesystem cache.
129 noplaylist: Download single video instead of a playlist if in doubt.
130 age_limit: An integer representing the user's age in years.
131 Unsuitable videos for the given age are skipped.
132 min_views: An integer representing the minimum view count the video
133 must have in order to not be skipped.
134 Videos without view count information are always
135 downloaded. None for no limit.
136 max_views: An integer representing the maximum view count.
137 Videos that are more popular than that are not
139 Videos without view count information are always
140 downloaded. None for no limit.
141 download_archive: File name of a file where all downloads are recorded.
142 Videos already present in the file are not downloaded
144 cookiefile: File name where cookies should be read from and dumped to.
145 nocheckcertificate:Do not verify SSL certificates
146 proxy: URL of the proxy server to use
147 socket_timeout: Time to wait for unresponsive hosts, in seconds
148 bidi_workaround: Work around buggy terminals without bidirectional text
149 support, using fridibi
151 The following parameters are not used by YoutubeDL itself, they are used by
153 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
154 noresizebuffer, retries, continuedl, noprogress, consoletitle
160 _download_retcode = None
161 _num_downloads = None
164 def __init__(self, params=None):
165 """Create a FileDownloader object with the given options."""
167 self._ies_instances = {}
169 self._progress_hooks = []
170 self._download_retcode = 0
171 self._num_downloads = 0
172 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
173 self._err_file = sys.stderr
174 self.params = {} if params is None else params
176 if params.get('bidi_workaround', False):
179 master, slave = pty.openpty()
180 width = get_term_width()
184 width_args = ['-w', str(width)]
185 self._fribidi = subprocess.Popen(
186 ['fribidi', '-c', 'UTF-8'] + width_args,
187 stdin=subprocess.PIPE,
189 stderr=self._err_file)
190 self._fribidi_channel = os.fdopen(master, 'rb')
191 except OSError as ose:
193 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
197 if (sys.version_info >= (3,) and sys.platform != 'win32' and
198 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
199 and not params['restrictfilenames']):
200 # On Python 3, the Unicode filesystem API will throw errors (#1474)
202 u'Assuming --restrict-filenames since file system encoding '
203 u'cannot encode all charactes. '
204 u'Set the LC_ALL environment variable to fix this.')
205 self.params['restrictfilenames'] = True
207 self.fd = FileDownloader(self, self.params)
209 if '%(stitle)s' in self.params.get('outtmpl', ''):
210 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
214 def add_info_extractor(self, ie):
215 """Add an InfoExtractor object to the end of the list."""
217 self._ies_instances[ie.ie_key()] = ie
218 ie.set_downloader(self)
220 def get_info_extractor(self, ie_key):
222 Get an instance of an IE with name ie_key, it will try to get one from
223 the _ies list, if there's no instance it will create a new one and add
224 it to the extractor list.
226 ie = self._ies_instances.get(ie_key)
228 ie = get_info_extractor(ie_key)()
229 self.add_info_extractor(ie)
232 def add_default_info_extractors(self):
234 Add the InfoExtractors returned by gen_extractors to the end of the list
236 for ie in gen_extractors():
237 self.add_info_extractor(ie)
239 def add_post_processor(self, pp):
240 """Add a PostProcessor object to the end of the chain."""
242 pp.set_downloader(self)
244 def _bidi_workaround(self, message):
245 if not hasattr(self, '_fribidi_channel'):
248 assert type(message) == type(u'')
249 line_count = message.count(u'\n') + 1
250 self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
251 self._fribidi.stdin.flush()
252 res = u''.join(self._fribidi_channel.readline().decode('utf-8')
253 for _ in range(line_count))
254 return res[:-len(u'\n')]
256 def to_screen(self, message, skip_eol=False):
257 """Print message to stdout if not in quiet mode."""
258 return self.to_stdout(message, skip_eol, check_quiet=True)
260 def to_stdout(self, message, skip_eol=False, check_quiet=False):
261 """Print message to stdout if not in quiet mode."""
262 if self.params.get('logger'):
263 self.params['logger'].debug(message)
264 elif not check_quiet or not self.params.get('quiet', False):
265 message = self._bidi_workaround(message)
266 terminator = [u'\n', u''][skip_eol]
267 output = message + terminator
269 write_string(output, self._screen_file)
271 def to_stderr(self, message):
272 """Print message to stderr."""
273 assert type(message) == type(u'')
274 if self.params.get('logger'):
275 self.params['logger'].error(message)
277 message = self._bidi_workaround(message)
278 output = message + u'\n'
279 write_string(output, self._err_file)
281 def to_console_title(self, message):
282 if not self.params.get('consoletitle', False):
284 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
285 # c_wchar_p() might not be necessary if `message` is
286 # already of type unicode()
287 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
288 elif 'TERM' in os.environ:
289 write_string(u'\033]0;%s\007' % message, self._screen_file)
291 def save_console_title(self):
292 if not self.params.get('consoletitle', False):
294 if 'TERM' in os.environ:
295 # Save the title on stack
296 write_string(u'\033[22;0t', self._screen_file)
298 def restore_console_title(self):
299 if not self.params.get('consoletitle', False):
301 if 'TERM' in os.environ:
302 # Restore the title from stack
303 write_string(u'\033[23;0t', self._screen_file)
306 self.save_console_title()
309 def __exit__(self, *args):
310 self.restore_console_title()
312 if self.params.get('cookiefile') is not None:
313 self.cookiejar.save()
315 def trouble(self, message=None, tb=None):
316 """Determine action to take when a download problem appears.
318 Depending on if the downloader has been configured to ignore
319 download errors or not, this method may throw an exception or
320 not when errors are found, after printing the message.
322 tb, if given, is additional traceback information.
324 if message is not None:
325 self.to_stderr(message)
326 if self.params.get('verbose'):
328 if sys.exc_info()[0]: # if .trouble has been called from an except block
330 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
331 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
332 tb += compat_str(traceback.format_exc())
334 tb_data = traceback.format_list(traceback.extract_stack())
335 tb = u''.join(tb_data)
337 if not self.params.get('ignoreerrors', False):
338 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
339 exc_info = sys.exc_info()[1].exc_info
341 exc_info = sys.exc_info()
342 raise DownloadError(message, exc_info)
343 self._download_retcode = 1
345 def report_warning(self, message):
347 Print the message to stderr, it will be prefixed with 'WARNING:'
348 If stderr is a tty file the 'WARNING:' will be colored
350 if self._err_file.isatty() and os.name != 'nt':
351 _msg_header = u'\033[0;33mWARNING:\033[0m'
353 _msg_header = u'WARNING:'
354 warning_message = u'%s %s' % (_msg_header, message)
355 self.to_stderr(warning_message)
357 def report_error(self, message, tb=None):
359 Do the same as trouble, but prefixes the message with 'ERROR:', colored
360 in red if stderr is a tty file.
362 if self._err_file.isatty() and os.name != 'nt':
363 _msg_header = u'\033[0;31mERROR:\033[0m'
365 _msg_header = u'ERROR:'
366 error_message = u'%s %s' % (_msg_header, message)
367 self.trouble(error_message, tb)
369 def report_writedescription(self, descfn):
370 """ Report that the description file is being written """
371 self.to_screen(u'[info] Writing video description to: ' + descfn)
373 def report_writesubtitles(self, sub_filename):
374 """ Report that the subtitles file is being written """
375 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
377 def report_writeinfojson(self, infofn):
378 """ Report that the metadata file has been written """
379 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
381 def report_writeannotations(self, annofn):
382 """ Report that the annotations file has been written. """
383 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
385 def report_file_already_downloaded(self, file_name):
386 """Report file has already been fully downloaded."""
388 self.to_screen(u'[download] %s has already been downloaded' % file_name)
389 except UnicodeEncodeError:
390 self.to_screen(u'[download] The file has already been downloaded')
392 def increment_downloads(self):
393 """Increment the ordinal that assigns a number to each file."""
394 self._num_downloads += 1
396 def prepare_filename(self, info_dict):
397 """Generate the output filename."""
399 template_dict = dict(info_dict)
401 template_dict['epoch'] = int(time.time())
402 autonumber_size = self.params.get('autonumber_size')
403 if autonumber_size is None:
405 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
406 template_dict['autonumber'] = autonumber_templ % self._num_downloads
407 if template_dict.get('playlist_index') is not None:
408 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
410 sanitize = lambda k, v: sanitize_filename(
412 restricted=self.params.get('restrictfilenames'),
414 template_dict = dict((k, sanitize(k, v))
415 for k, v in template_dict.items()
417 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
419 tmpl = os.path.expanduser(self.params['outtmpl'])
420 filename = tmpl % template_dict
422 except ValueError as err:
423 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
426 def _match_entry(self, info_dict):
427 """ Returns None iff the file should be downloaded """
429 video_title = info_dict.get('title', info_dict.get('id', u'video'))
430 if 'title' in info_dict:
431 # This can happen when we're just evaluating the playlist
432 title = info_dict['title']
433 matchtitle = self.params.get('matchtitle', False)
435 if not re.search(matchtitle, title, re.IGNORECASE):
436 return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
437 rejecttitle = self.params.get('rejecttitle', False)
439 if re.search(rejecttitle, title, re.IGNORECASE):
440 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
441 date = info_dict.get('upload_date', None)
443 dateRange = self.params.get('daterange', DateRange())
444 if date not in dateRange:
445 return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
446 view_count = info_dict.get('view_count', None)
447 if view_count is not None:
448 min_views = self.params.get('min_views')
449 if min_views is not None and view_count < min_views:
450 return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
451 max_views = self.params.get('max_views')
452 if max_views is not None and view_count > max_views:
453 return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
454 age_limit = self.params.get('age_limit')
455 if age_limit is not None:
456 if age_limit < info_dict.get('age_limit', 0):
457 return u'Skipping "' + title + '" because it is age restricted'
458 if self.in_download_archive(info_dict):
459 return u'%s has already been recorded in archive' % video_title
463 def add_extra_info(info_dict, extra_info):
464 '''Set the keys from extra_info in info dict if they are missing'''
465 for key, value in extra_info.items():
466 info_dict.setdefault(key, value)
468 def extract_info(self, url, download=True, ie_key=None, extra_info={},
471 Returns a list with a dictionary for each video we find.
472 If 'download', also downloads the videos.
473 extra_info is a dict containing the extra values to add to each result
477 ies = [self.get_info_extractor(ie_key)]
482 if not ie.suitable(url):
486 self.report_warning(u'The program functionality for this site has been marked as broken, '
487 u'and will probably not work.')
490 ie_result = ie.extract(url)
491 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
493 if isinstance(ie_result, list):
494 # Backwards compatibility: old IE result format
496 '_type': 'compat_list',
497 'entries': ie_result,
499 self.add_extra_info(ie_result,
501 'extractor': ie.IE_NAME,
503 'extractor_key': ie.ie_key(),
506 return self.process_ie_result(ie_result, download, extra_info)
509 except ExtractorError as de: # An error we somewhat expected
510 self.report_error(compat_str(de), de.format_traceback())
512 except Exception as e:
513 if self.params.get('ignoreerrors', False):
514 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
519 self.report_error(u'no suitable InfoExtractor: %s' % url)
521 def process_ie_result(self, ie_result, download=True, extra_info={}):
523 Take the result of the ie(may be modified) and resolve all unresolved
524 references (URLs, playlist items).
526 It will also download the videos if 'download'.
527 Returns the resolved ie_result.
530 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
531 if result_type == 'video':
532 self.add_extra_info(ie_result, extra_info)
533 return self.process_video_result(ie_result, download=download)
534 elif result_type == 'url':
535 # We have to add extra_info to the results because it may be
536 # contained in a playlist
537 return self.extract_info(ie_result['url'],
539 ie_key=ie_result.get('ie_key'),
540 extra_info=extra_info)
541 elif result_type == 'url_transparent':
542 # Use the information from the embedding page
543 info = self.extract_info(
544 ie_result['url'], ie_key=ie_result.get('ie_key'),
545 extra_info=extra_info, download=False, process=False)
547 def make_result(embedded_info):
548 new_result = ie_result.copy()
549 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
550 'entries', 'urlhandle', 'ie_key', 'duration',
551 'subtitles', 'annotations', 'format',
552 'thumbnail', 'thumbnails'):
555 if f in embedded_info:
556 new_result[f] = embedded_info[f]
558 new_result = make_result(info)
560 assert new_result.get('_type') != 'url_transparent'
561 if new_result.get('_type') == 'compat_list':
562 new_result['entries'] = [
563 make_result(e) for e in new_result['entries']]
565 return self.process_ie_result(
566 new_result, download=download, extra_info=extra_info)
567 elif result_type == 'playlist':
568 # We process each entry in the playlist
569 playlist = ie_result.get('title', None) or ie_result.get('id', None)
570 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
572 playlist_results = []
574 n_all_entries = len(ie_result['entries'])
575 playliststart = self.params.get('playliststart', 1) - 1
576 playlistend = self.params.get('playlistend', -1)
578 if playlistend == -1:
579 entries = ie_result['entries'][playliststart:]
581 entries = ie_result['entries'][playliststart:playlistend]
583 n_entries = len(entries)
585 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
586 (ie_result['extractor'], playlist, n_all_entries, n_entries))
588 for i, entry in enumerate(entries, 1):
589 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
591 'playlist': playlist,
592 'playlist_index': i + playliststart,
593 'extractor': ie_result['extractor'],
594 'webpage_url': ie_result['webpage_url'],
595 'extractor_key': ie_result['extractor_key'],
598 reason = self._match_entry(entry)
599 if reason is not None:
600 self.to_screen(u'[download] ' + reason)
603 entry_result = self.process_ie_result(entry,
606 playlist_results.append(entry_result)
607 ie_result['entries'] = playlist_results
609 elif result_type == 'compat_list':
611 self.add_extra_info(r,
613 'extractor': ie_result['extractor'],
614 'webpage_url': ie_result['webpage_url'],
615 'extractor_key': ie_result['extractor_key'],
618 ie_result['entries'] = [
619 self.process_ie_result(_fixup(r), download, extra_info)
620 for r in ie_result['entries']
624 raise Exception('Invalid result type: %s' % result_type)
626 def select_format(self, format_spec, available_formats):
627 if format_spec == 'best' or format_spec is None:
628 return available_formats[-1]
629 elif format_spec == 'worst':
630 return available_formats[0]
632 extensions = [u'mp4', u'flv', u'webm', u'3gp']
633 if format_spec in extensions:
634 filter_f = lambda f: f['ext'] == format_spec
636 filter_f = lambda f: f['format_id'] == format_spec
637 matches = list(filter(filter_f, available_formats))
642 def process_video_result(self, info_dict, download=True):
643 assert info_dict.get('_type', 'video') == 'video'
645 if 'playlist' not in info_dict:
646 # It isn't part of a playlist
647 info_dict['playlist'] = None
648 info_dict['playlist_index'] = None
650 # This extractors handle format selection themselves
651 if info_dict['extractor'] in [u'youtube', u'Youku']:
653 self.process_info(info_dict)
656 # We now pick which formats have to be downloaded
657 if info_dict.get('formats') is None:
658 # There's only one format available
659 formats = [info_dict]
661 formats = info_dict['formats']
663 # We check that all the formats have the format and format_id fields
664 for (i, format) in enumerate(formats):
665 if format.get('format_id') is None:
666 format['format_id'] = compat_str(i)
667 if format.get('format') is None:
668 format['format'] = u'{id} - {res}{note}'.format(
669 id=format['format_id'],
670 res=self.format_resolution(format),
671 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
673 # Automatically determine file extension if missing
674 if 'ext' not in format:
675 format['ext'] = determine_ext(format['url'])
677 if self.params.get('listformats', None):
678 self.list_formats(info_dict)
681 format_limit = self.params.get('format_limit', None)
683 formats = list(takewhile_inclusive(
684 lambda f: f['format_id'] != format_limit, formats
686 if self.params.get('prefer_free_formats'):
687 def _free_formats_key(f):
689 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
692 # We only compare the extension if they have the same height and width
693 return (f.get('height'), f.get('width'), ext_ord)
694 formats = sorted(formats, key=_free_formats_key)
696 req_format = self.params.get('format', 'best')
697 if req_format is None:
699 formats_to_download = []
700 # The -1 is for supporting YoutubeIE
701 if req_format in ('-1', 'all'):
702 formats_to_download = formats
704 # We can accept formats requestd in the format: 34/5/best, we pick
705 # the first that is available, starting from left
706 req_formats = req_format.split('/')
707 for rf in req_formats:
708 selected_format = self.select_format(rf, formats)
709 if selected_format is not None:
710 formats_to_download = [selected_format]
712 if not formats_to_download:
713 raise ExtractorError(u'requested format not available',
717 if len(formats_to_download) > 1:
718 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
719 for format in formats_to_download:
720 new_info = dict(info_dict)
721 new_info.update(format)
722 self.process_info(new_info)
723 # We update the info dict with the best quality format (backwards compatibility)
724 info_dict.update(formats_to_download[-1])
727 def process_info(self, info_dict):
728 """Process a single resolved IE result."""
730 assert info_dict.get('_type', 'video') == 'video'
731 #We increment the download the download count here to match the previous behaviour.
732 self.increment_downloads()
734 info_dict['fulltitle'] = info_dict['title']
735 if len(info_dict['title']) > 200:
736 info_dict['title'] = info_dict['title'][:197] + u'...'
738 # Keep for backwards compatibility
739 info_dict['stitle'] = info_dict['title']
741 if not 'format' in info_dict:
742 info_dict['format'] = info_dict['ext']
744 reason = self._match_entry(info_dict)
745 if reason is not None:
746 self.to_screen(u'[download] ' + reason)
749 max_downloads = self.params.get('max_downloads')
750 if max_downloads is not None:
751 if self._num_downloads > int(max_downloads):
752 raise MaxDownloadsReached()
754 filename = self.prepare_filename(info_dict)
757 if self.params.get('forcetitle', False):
758 self.to_stdout(info_dict['fulltitle'])
759 if self.params.get('forceid', False):
760 self.to_stdout(info_dict['id'])
761 if self.params.get('forceurl', False):
762 # For RTMP URLs, also include the playpath
763 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
764 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
765 self.to_stdout(info_dict['thumbnail'])
766 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
767 self.to_stdout(info_dict['description'])
768 if self.params.get('forcefilename', False) and filename is not None:
769 self.to_stdout(filename)
770 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
771 self.to_stdout(formatSeconds(info_dict['duration']))
772 if self.params.get('forceformat', False):
773 self.to_stdout(info_dict['format'])
774 if self.params.get('forcejson', False):
775 info_dict['_filename'] = filename
776 self.to_stdout(json.dumps(info_dict))
778 # Do nothing else if in simulate mode
779 if self.params.get('simulate', False):
786 dn = os.path.dirname(encodeFilename(filename))
787 if dn != '' and not os.path.exists(dn):
789 except (OSError, IOError) as err:
790 self.report_error(u'unable to create directory ' + compat_str(err))
793 if self.params.get('writedescription', False):
795 descfn = filename + u'.description'
796 self.report_writedescription(descfn)
797 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
798 descfile.write(info_dict['description'])
799 except (KeyError, TypeError):
800 self.report_warning(u'There\'s no description to write.')
801 except (OSError, IOError):
802 self.report_error(u'Cannot write description file ' + descfn)
805 if self.params.get('writeannotations', False):
807 annofn = filename + u'.annotations.xml'
808 self.report_writeannotations(annofn)
809 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
810 annofile.write(info_dict['annotations'])
811 except (KeyError, TypeError):
812 self.report_warning(u'There are no annotations to write.')
813 except (OSError, IOError):
814 self.report_error(u'Cannot write annotations file: ' + annofn)
817 subtitles_are_requested = any([self.params.get('writesubtitles', False),
818 self.params.get('writeautomaticsub')])
820 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
821 # subtitles download errors are already managed as troubles in relevant IE
822 # that way it will silently go on when used with unsupporting IE
823 subtitles = info_dict['subtitles']
824 sub_format = self.params.get('subtitlesformat', 'srt')
825 for sub_lang in subtitles.keys():
826 sub = subtitles[sub_lang]
830 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
831 self.report_writesubtitles(sub_filename)
832 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
834 except (OSError, IOError):
835 self.report_error(u'Cannot write subtitles file ' + descfn)
838 if self.params.get('writeinfojson', False):
839 infofn = os.path.splitext(filename)[0] + u'.info.json'
840 self.report_writeinfojson(infofn)
842 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
843 write_json_file(json_info_dict, encodeFilename(infofn))
844 except (OSError, IOError):
845 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
848 if self.params.get('writethumbnail', False):
849 if info_dict.get('thumbnail') is not None:
850 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
851 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
852 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
853 (info_dict['extractor'], info_dict['id']))
855 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
856 with open(thumb_filename, 'wb') as thumbf:
857 shutil.copyfileobj(uf, thumbf)
858 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
859 (info_dict['extractor'], info_dict['id'], thumb_filename))
860 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
861 self.report_warning(u'Unable to download thumbnail "%s": %s' %
862 (info_dict['thumbnail'], compat_str(err)))
864 if not self.params.get('skip_download', False):
865 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
869 success = self.fd._do_download(filename, info_dict)
870 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
871 self.report_error(u'unable to download video data: %s' % str(err))
873 except (OSError, IOError) as err:
874 raise UnavailableVideoError(err)
875 except (ContentTooShortError, ) as err:
876 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
881 self.post_process(filename, info_dict)
882 except (PostProcessingError) as err:
883 self.report_error(u'postprocessing: %s' % str(err))
886 self.record_download_archive(info_dict)
888 def download(self, url_list):
889 """Download a given list of URLs."""
890 if (len(url_list) > 1 and
891 '%' not in self.params['outtmpl']
892 and self.params.get('max_downloads') != 1):
893 raise SameFileError(self.params['outtmpl'])
897 #It also downloads the videos
898 self.extract_info(url)
899 except UnavailableVideoError:
900 self.report_error(u'unable to download video')
901 except MaxDownloadsReached:
902 self.to_screen(u'[info] Maximum number of downloaded files reached.')
905 return self._download_retcode
907 def download_with_info_file(self, info_filename):
908 with io.open(info_filename, 'r', encoding='utf-8') as f:
911 self.process_ie_result(info, download=True)
912 except DownloadError:
913 webpage_url = info.get('webpage_url')
914 if webpage_url is not None:
915 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
916 return self.download([webpage_url])
919 return self._download_retcode
921 def post_process(self, filename, ie_info):
922 """Run all the postprocessors on the given file."""
924 info['filepath'] = filename
928 keep_video_wish, new_info = pp.run(info)
929 if keep_video_wish is not None:
931 keep_video = keep_video_wish
932 elif keep_video is None:
933 # No clear decision yet, let IE decide
934 keep_video = keep_video_wish
935 except PostProcessingError as e:
936 self.report_error(e.msg)
937 if keep_video is False and not self.params.get('keepvideo', False):
939 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
940 os.remove(encodeFilename(filename))
941 except (IOError, OSError):
942 self.report_warning(u'Unable to remove downloaded video file')
944 def _make_archive_id(self, info_dict):
945 # Future-proof against any change in case
946 # and backwards compatibility with prior versions
947 extractor = info_dict.get('extractor_key')
948 if extractor is None:
949 if 'id' in info_dict:
950 extractor = info_dict.get('ie_key') # key in a playlist
951 if extractor is None:
952 return None # Incomplete video information
953 return extractor.lower() + u' ' + info_dict['id']
955 def in_download_archive(self, info_dict):
956 fn = self.params.get('download_archive')
960 vid_id = self._make_archive_id(info_dict)
962 return False # Incomplete video information
965 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
966 for line in archive_file:
967 if line.strip() == vid_id:
969 except IOError as ioe:
970 if ioe.errno != errno.ENOENT:
974 def record_download_archive(self, info_dict):
975 fn = self.params.get('download_archive')
978 vid_id = self._make_archive_id(info_dict)
980 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
981 archive_file.write(vid_id + u'\n')
984 def format_resolution(format, default='unknown'):
985 if format.get('vcodec') == 'none':
987 if format.get('_resolution') is not None:
988 return format['_resolution']
989 if format.get('height') is not None:
990 if format.get('width') is not None:
991 res = u'%sx%s' % (format['width'], format['height'])
993 res = u'%sp' % format['height']
998 def list_formats(self, info_dict):
999 def format_note(fdict):
1001 if fdict.get('format_note') is not None:
1002 res += fdict['format_note'] + u' '
1003 if (fdict.get('vcodec') is not None and
1004 fdict.get('vcodec') != 'none'):
1005 res += u'%-5s' % fdict['vcodec']
1006 elif fdict.get('vbr') is not None:
1008 if fdict.get('vbr') is not None:
1009 res += u'@%4dk' % fdict['vbr']
1010 if fdict.get('acodec') is not None:
1013 res += u'%-5s' % fdict['acodec']
1014 elif fdict.get('abr') is not None:
1018 if fdict.get('abr') is not None:
1019 res += u'@%3dk' % fdict['abr']
1020 if fdict.get('filesize') is not None:
1023 res += format_bytes(fdict['filesize'])
1026 def line(format, idlen=20):
1027 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1028 format['format_id'],
1030 self.format_resolution(format),
1031 format_note(format),
1034 formats = info_dict.get('formats', [info_dict])
1035 idlen = max(len(u'format code'),
1036 max(len(f['format_id']) for f in formats))
1037 formats_s = [line(f, idlen) for f in formats]
1038 if len(formats) > 1:
1039 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1040 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1042 header_line = line({
1043 'format_id': u'format code', 'ext': u'extension',
1044 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1045 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1046 (info_dict['id'], header_line, u"\n".join(formats_s)))
1048 def urlopen(self, req):
1049 """ Start an HTTP download """
1050 return self._opener.open(req)
1052 def print_debug_header(self):
1053 if not self.params.get('verbose'):
1055 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1057 sp = subprocess.Popen(
1058 ['git', 'rev-parse', '--short', 'HEAD'],
1059 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1060 cwd=os.path.dirname(os.path.abspath(__file__)))
1061 out, err = sp.communicate()
1062 out = out.decode().strip()
1063 if re.match('[0-9a-f]+', out):
1064 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1070 write_string(u'[debug] Python version %s - %s' %
1071 (platform.python_version(), platform_name()) + u'\n')
1074 for handler in self._opener.handlers:
1075 if hasattr(handler, 'proxies'):
1076 proxy_map.update(handler.proxies)
1077 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1079 def _setup_opener(self):
1080 timeout_val = self.params.get('socket_timeout')
1081 timeout = 600 if timeout_val is None else float(timeout_val)
1083 opts_cookiefile = self.params.get('cookiefile')
1084 opts_proxy = self.params.get('proxy')
1086 if opts_cookiefile is None:
1087 self.cookiejar = compat_cookiejar.CookieJar()
1089 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1091 if os.access(opts_cookiefile, os.R_OK):
1092 self.cookiejar.load()
1094 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1096 if opts_proxy is not None:
1097 if opts_proxy == '':
1100 proxies = {'http': opts_proxy, 'https': opts_proxy}
1102 proxies = compat_urllib_request.getproxies()
1103 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1104 if 'http' in proxies and 'https' not in proxies:
1105 proxies['https'] = proxies['http']
1106 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1107 https_handler = make_HTTPS_handler(
1108 self.params.get('nocheckcertificate', False))
1109 opener = compat_urllib_request.build_opener(
1110 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1111 # Delete the default user-agent header, which would otherwise apply in
1112 # cases where our custom HTTP handler doesn't come into play
1113 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1114 opener.addheaders = []
1115 self._opener = opener
1117 # TODO remove this global modification
1118 compat_urllib_request.install_opener(opener)
1119 socket.setdefaulttimeout(timeout)