2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
47 UnavailableVideoError,
52 from .extractor import get_info_extractor, gen_extractors
53 from .FileDownloader import FileDownloader
54 from .version import __version__
57 class YoutubeDL(object):
60 YoutubeDL objects are the ones responsible of downloading the
61 actual video file and writing it to disk if the user has requested
62 it, among some other tasks. In most cases there should be one per
63 program. As, given a video URL, the downloader doesn't know how to
64 extract all the needed information, task that InfoExtractors do, it
65 has to pass the URL to one of them.
67 For this, YoutubeDL objects have a method that allows
68 InfoExtractors to be registered in a given order. When it is passed
69 a URL, the YoutubeDL object handles it to the first InfoExtractor it
70 finds that reports being able to handle it. The InfoExtractor extracts
71 all the information about the video or videos the URL refers to, and
72 YoutubeDL process the extracted information, possibly using a File
73 Downloader to download the video.
75 YoutubeDL objects accept a lot of parameters. In order not to saturate
76 the object constructor with arguments, it receives a dictionary of
77 options instead. These options are available through the params
78 attribute for the InfoExtractors to use. The YoutubeDL also
79 registers itself as the downloader in charge for the InfoExtractors
80 that are added to it, so this is a "mutual registration".
84 username: Username for authentication purposes.
85 password: Password for authentication purposes.
86 videopassword: Password for acces a video.
87 usenetrc: Use netrc for authentication instead.
88 verbose: Print additional info to stdout.
89 quiet: Do not print messages to stdout.
90 forceurl: Force printing final URL.
91 forcetitle: Force printing title.
92 forceid: Force printing ID.
93 forcethumbnail: Force printing thumbnail URL.
94 forcedescription: Force printing description.
95 forcefilename: Force printing final filename.
96 forcejson: Force printing info_dict as JSON.
97 simulate: Do not download the video files.
98 format: Video format code.
99 format_limit: Highest quality format to try.
100 outtmpl: Template for output names.
101 restrictfilenames: Do not allow "&" and spaces in file names
102 ignoreerrors: Do not stop on download errors.
103 nooverwrites: Prevent overwriting files.
104 playliststart: Playlist item to start at.
105 playlistend: Playlist item to end at.
106 matchtitle: Download only matching titles.
107 rejecttitle: Reject downloads for matching titles.
108 logger: Log messages to a logging.Logger instance.
109 logtostderr: Log messages to stderr instead of stdout.
110 writedescription: Write the video description to a .description file
111 writeinfojson: Write the video description to a .info.json file
112 writeannotations: Write the video annotations to a .annotations.xml file
113 writethumbnail: Write the thumbnail image to a file
114 writesubtitles: Write the video subtitles to a file
115 writeautomaticsub: Write the automatic subtitles to a file
116 allsubtitles: Downloads all the subtitles of the video
117 (requires writesubtitles or writeautomaticsub)
118 listsubtitles: Lists all available subtitles for the video
119 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
120 subtitleslangs: List of languages of the subtitles to download
121 keepvideo: Keep the video file after post-processing
122 daterange: A DateRange object, download only if the upload_date is in the range.
123 skip_download: Skip the actual download of the video file
124 cachedir: Location of the cache files in the filesystem.
125 None to disable filesystem cache.
126 noplaylist: Download single video instead of a playlist if in doubt.
127 age_limit: An integer representing the user's age in years.
128 Unsuitable videos for the given age are skipped.
129 download_archive: File name of a file where all downloads are recorded.
130 Videos already present in the file are not downloaded
132 cookiefile: File name where cookies should be read from and dumped to.
133 nocheckcertificate:Do not verify SSL certificates
134 proxy: URL of the proxy server to use
135 socket_timeout: Time to wait for unresponsive hosts, in seconds
136 bidi_workaround: Work around buggy terminals without bidirectional text
137 support, using fridibi
139 The following parameters are not used by YoutubeDL itself, they are used by
141 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
142 noresizebuffer, retries, continuedl, noprogress, consoletitle
148 _download_retcode = None
149 _num_downloads = None
152 def __init__(self, params=None):
153 """Create a FileDownloader object with the given options."""
155 self._ies_instances = {}
157 self._progress_hooks = []
158 self._download_retcode = 0
159 self._num_downloads = 0
160 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
161 self._err_file = sys.stderr
162 self.params = {} if params is None else params
164 # Pipe messsages through fribidi
165 if params.get('bidi_workaround', False):
166 # fribidi does not support ungetting, so force newlines
167 params['progress_with_newline'] = True
169 for fid in ['_screen_file', '_err_file']:
170 class FribidiOut(object):
171 def __init__(self, outfile, errfile):
172 self.outfile = outfile
173 self.process = subprocess.Popen(
175 stdin=subprocess.PIPE,
180 res = self.process.stdin.write(s)
185 return self.process.stdin.flush()
188 return self.outfile.isatty()
191 vout = FribidiOut(getattr(self, fid), self._err_file)
192 setattr(self, fid, vout)
193 except OSError as ose:
195 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
200 if (sys.version_info >= (3,) and sys.platform != 'win32' and
201 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
202 and not params['restrictfilenames']):
203 # On Python 3, the Unicode filesystem API will throw errors (#1474)
205 u'Assuming --restrict-filenames since file system encoding '
206 u'cannot encode all charactes. '
207 u'Set the LC_ALL environment variable to fix this.')
208 self.params['restrictfilenames'] = True
210 self.fd = FileDownloader(self, self.params)
212 if '%(stitle)s' in self.params.get('outtmpl', ''):
213 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
217 def add_info_extractor(self, ie):
218 """Add an InfoExtractor object to the end of the list."""
220 self._ies_instances[ie.ie_key()] = ie
221 ie.set_downloader(self)
223 def get_info_extractor(self, ie_key):
225 Get an instance of an IE with name ie_key, it will try to get one from
226 the _ies list, if there's no instance it will create a new one and add
227 it to the extractor list.
229 ie = self._ies_instances.get(ie_key)
231 ie = get_info_extractor(ie_key)()
232 self.add_info_extractor(ie)
235 def add_default_info_extractors(self):
237 Add the InfoExtractors returned by gen_extractors to the end of the list
239 for ie in gen_extractors():
240 self.add_info_extractor(ie)
242 def add_post_processor(self, pp):
243 """Add a PostProcessor object to the end of the chain."""
245 pp.set_downloader(self)
247 def to_screen(self, message, skip_eol=False):
248 """Print message to stdout if not in quiet mode."""
249 return self.to_stdout(message, skip_eol, check_quiet=True)
251 def to_stdout(self, message, skip_eol=False, check_quiet=False):
252 """Print message to stdout if not in quiet mode."""
253 if self.params.get('logger'):
254 self.params['logger'].debug(message)
255 elif not check_quiet or not self.params.get('quiet', False):
256 terminator = [u'\n', u''][skip_eol]
257 output = message + terminator
258 write_string(output, self._screen_file)
260 def to_stderr(self, message):
261 """Print message to stderr."""
262 assert type(message) == type(u'')
263 if self.params.get('logger'):
264 self.params['logger'].error(message)
266 output = message + u'\n'
267 write_string(output, self._err_file)
269 def to_console_title(self, message):
270 if not self.params.get('consoletitle', False):
272 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
273 # c_wchar_p() might not be necessary if `message` is
274 # already of type unicode()
275 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
276 elif 'TERM' in os.environ:
277 write_string(u'\033]0;%s\007' % message, self._screen_file)
279 def save_console_title(self):
280 if not self.params.get('consoletitle', False):
282 if 'TERM' in os.environ:
283 # Save the title on stack
284 write_string(u'\033[22;0t', self._screen_file)
286 def restore_console_title(self):
287 if not self.params.get('consoletitle', False):
289 if 'TERM' in os.environ:
290 # Restore the title from stack
291 write_string(u'\033[23;0t', self._screen_file)
294 self.save_console_title()
297 def __exit__(self, *args):
298 self.restore_console_title()
300 if self.params.get('cookiefile') is not None:
301 self.cookiejar.save()
303 def trouble(self, message=None, tb=None):
304 """Determine action to take when a download problem appears.
306 Depending on if the downloader has been configured to ignore
307 download errors or not, this method may throw an exception or
308 not when errors are found, after printing the message.
310 tb, if given, is additional traceback information.
312 if message is not None:
313 self.to_stderr(message)
314 if self.params.get('verbose'):
316 if sys.exc_info()[0]: # if .trouble has been called from an except block
318 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
319 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
320 tb += compat_str(traceback.format_exc())
322 tb_data = traceback.format_list(traceback.extract_stack())
323 tb = u''.join(tb_data)
325 if not self.params.get('ignoreerrors', False):
326 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
327 exc_info = sys.exc_info()[1].exc_info
329 exc_info = sys.exc_info()
330 raise DownloadError(message, exc_info)
331 self._download_retcode = 1
333 def report_warning(self, message):
335 Print the message to stderr, it will be prefixed with 'WARNING:'
336 If stderr is a tty file the 'WARNING:' will be colored
338 if self._err_file.isatty() and os.name != 'nt':
339 _msg_header = u'\033[0;33mWARNING:\033[0m'
341 _msg_header = u'WARNING:'
342 warning_message = u'%s %s' % (_msg_header, message)
343 self.to_stderr(warning_message)
345 def report_error(self, message, tb=None):
347 Do the same as trouble, but prefixes the message with 'ERROR:', colored
348 in red if stderr is a tty file.
350 if self._err_file.isatty() and os.name != 'nt':
351 _msg_header = u'\033[0;31mERROR:\033[0m'
353 _msg_header = u'ERROR:'
354 error_message = u'%s %s' % (_msg_header, message)
355 self.trouble(error_message, tb)
357 def report_writedescription(self, descfn):
358 """ Report that the description file is being written """
359 self.to_screen(u'[info] Writing video description to: ' + descfn)
361 def report_writesubtitles(self, sub_filename):
362 """ Report that the subtitles file is being written """
363 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
365 def report_writeinfojson(self, infofn):
366 """ Report that the metadata file has been written """
367 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
369 def report_writeannotations(self, annofn):
370 """ Report that the annotations file has been written. """
371 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
373 def report_file_already_downloaded(self, file_name):
374 """Report file has already been fully downloaded."""
376 self.to_screen(u'[download] %s has already been downloaded' % file_name)
377 except UnicodeEncodeError:
378 self.to_screen(u'[download] The file has already been downloaded')
380 def increment_downloads(self):
381 """Increment the ordinal that assigns a number to each file."""
382 self._num_downloads += 1
384 def prepare_filename(self, info_dict):
385 """Generate the output filename."""
387 template_dict = dict(info_dict)
389 template_dict['epoch'] = int(time.time())
390 autonumber_size = self.params.get('autonumber_size')
391 if autonumber_size is None:
393 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
394 template_dict['autonumber'] = autonumber_templ % self._num_downloads
395 if template_dict.get('playlist_index') is not None:
396 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
398 sanitize = lambda k, v: sanitize_filename(
399 u'NA' if v is None else compat_str(v),
400 restricted=self.params.get('restrictfilenames'),
402 template_dict = dict((k, sanitize(k, v))
403 for k, v in template_dict.items())
405 tmpl = os.path.expanduser(self.params['outtmpl'])
406 filename = tmpl % template_dict
408 except KeyError as err:
409 self.report_error(u'Erroneous output template')
411 except ValueError as err:
412 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
415 def _match_entry(self, info_dict):
416 """ Returns None iff the file should be downloaded """
418 if 'title' in info_dict:
419 # This can happen when we're just evaluating the playlist
420 title = info_dict['title']
421 matchtitle = self.params.get('matchtitle', False)
423 if not re.search(matchtitle, title, re.IGNORECASE):
424 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
425 rejecttitle = self.params.get('rejecttitle', False)
427 if re.search(rejecttitle, title, re.IGNORECASE):
428 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
429 date = info_dict.get('upload_date', None)
431 dateRange = self.params.get('daterange', DateRange())
432 if date not in dateRange:
433 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
434 age_limit = self.params.get('age_limit')
435 if age_limit is not None:
436 if age_limit < info_dict.get('age_limit', 0):
437 return u'Skipping "' + title + '" because it is age restricted'
438 if self.in_download_archive(info_dict):
439 return (u'%s has already been recorded in archive'
440 % info_dict.get('title', info_dict.get('id', u'video')))
444 def add_extra_info(info_dict, extra_info):
445 '''Set the keys from extra_info in info dict if they are missing'''
446 for key, value in extra_info.items():
447 info_dict.setdefault(key, value)
449 def extract_info(self, url, download=True, ie_key=None, extra_info={},
452 Returns a list with a dictionary for each video we find.
453 If 'download', also downloads the videos.
454 extra_info is a dict containing the extra values to add to each result
458 ies = [self.get_info_extractor(ie_key)]
463 if not ie.suitable(url):
467 self.report_warning(u'The program functionality for this site has been marked as broken, '
468 u'and will probably not work.')
471 ie_result = ie.extract(url)
472 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
474 if isinstance(ie_result, list):
475 # Backwards compatibility: old IE result format
477 '_type': 'compat_list',
478 'entries': ie_result,
480 self.add_extra_info(ie_result,
482 'extractor': ie.IE_NAME,
484 'extractor_key': ie.ie_key(),
487 return self.process_ie_result(ie_result, download, extra_info)
490 except ExtractorError as de: # An error we somewhat expected
491 self.report_error(compat_str(de), de.format_traceback())
493 except Exception as e:
494 if self.params.get('ignoreerrors', False):
495 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
500 self.report_error(u'no suitable InfoExtractor: %s' % url)
502 def process_ie_result(self, ie_result, download=True, extra_info={}):
504 Take the result of the ie(may be modified) and resolve all unresolved
505 references (URLs, playlist items).
507 It will also download the videos if 'download'.
508 Returns the resolved ie_result.
511 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
512 if result_type == 'video':
513 self.add_extra_info(ie_result, extra_info)
514 return self.process_video_result(ie_result, download=download)
515 elif result_type == 'url':
516 # We have to add extra_info to the results because it may be
517 # contained in a playlist
518 return self.extract_info(ie_result['url'],
520 ie_key=ie_result.get('ie_key'),
521 extra_info=extra_info)
522 elif result_type == 'url_transparent':
523 # Use the information from the embedding page
524 info = self.extract_info(
525 ie_result['url'], ie_key=ie_result.get('ie_key'),
526 extra_info=extra_info, download=False, process=False)
528 def make_result(embedded_info):
529 new_result = ie_result.copy()
530 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
531 'entries', 'urlhandle', 'ie_key', 'duration',
532 'subtitles', 'annotations', 'format',
533 'thumbnail', 'thumbnails'):
536 if f in embedded_info:
537 new_result[f] = embedded_info[f]
539 new_result = make_result(info)
541 assert new_result.get('_type') != 'url_transparent'
542 if new_result.get('_type') == 'compat_list':
543 new_result['entries'] = [
544 make_result(e) for e in new_result['entries']]
546 return self.process_ie_result(
547 new_result, download=download, extra_info=extra_info)
548 elif result_type == 'playlist':
549 # We process each entry in the playlist
550 playlist = ie_result.get('title', None) or ie_result.get('id', None)
551 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
553 playlist_results = []
555 n_all_entries = len(ie_result['entries'])
556 playliststart = self.params.get('playliststart', 1) - 1
557 playlistend = self.params.get('playlistend', -1)
559 if playlistend == -1:
560 entries = ie_result['entries'][playliststart:]
562 entries = ie_result['entries'][playliststart:playlistend]
564 n_entries = len(entries)
566 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
567 (ie_result['extractor'], playlist, n_all_entries, n_entries))
569 for i, entry in enumerate(entries, 1):
570 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
572 'playlist': playlist,
573 'playlist_index': i + playliststart,
574 'extractor': ie_result['extractor'],
575 'webpage_url': ie_result['webpage_url'],
576 'extractor_key': ie_result['extractor_key'],
579 reason = self._match_entry(entry)
580 if reason is not None:
581 self.to_screen(u'[download] ' + reason)
584 entry_result = self.process_ie_result(entry,
587 playlist_results.append(entry_result)
588 ie_result['entries'] = playlist_results
590 elif result_type == 'compat_list':
592 self.add_extra_info(r,
594 'extractor': ie_result['extractor'],
595 'webpage_url': ie_result['webpage_url'],
596 'extractor_key': ie_result['extractor_key'],
599 ie_result['entries'] = [
600 self.process_ie_result(_fixup(r), download, extra_info)
601 for r in ie_result['entries']
605 raise Exception('Invalid result type: %s' % result_type)
607 def select_format(self, format_spec, available_formats):
608 if format_spec == 'best' or format_spec is None:
609 return available_formats[-1]
610 elif format_spec == 'worst':
611 return available_formats[0]
613 extensions = [u'mp4', u'flv', u'webm', u'3gp']
614 if format_spec in extensions:
615 filter_f = lambda f: f['ext'] == format_spec
617 filter_f = lambda f: f['format_id'] == format_spec
618 matches = list(filter(filter_f, available_formats))
623 def process_video_result(self, info_dict, download=True):
624 assert info_dict.get('_type', 'video') == 'video'
626 if 'playlist' not in info_dict:
627 # It isn't part of a playlist
628 info_dict['playlist'] = None
629 info_dict['playlist_index'] = None
631 # This extractors handle format selection themselves
632 if info_dict['extractor'] in [u'youtube', u'Youku']:
634 self.process_info(info_dict)
637 # We now pick which formats have to be downloaded
638 if info_dict.get('formats') is None:
639 # There's only one format available
640 formats = [info_dict]
642 formats = info_dict['formats']
644 # We check that all the formats have the format and format_id fields
645 for (i, format) in enumerate(formats):
646 if format.get('format_id') is None:
647 format['format_id'] = compat_str(i)
648 if format.get('format') is None:
649 format['format'] = u'{id} - {res}{note}'.format(
650 id=format['format_id'],
651 res=self.format_resolution(format),
652 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
654 # Automatically determine file extension if missing
655 if 'ext' not in format:
656 format['ext'] = determine_ext(format['url'])
658 if self.params.get('listformats', None):
659 self.list_formats(info_dict)
662 format_limit = self.params.get('format_limit', None)
664 formats = list(takewhile_inclusive(
665 lambda f: f['format_id'] != format_limit, formats
667 if self.params.get('prefer_free_formats'):
668 def _free_formats_key(f):
670 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
673 # We only compare the extension if they have the same height and width
674 return (f.get('height'), f.get('width'), ext_ord)
675 formats = sorted(formats, key=_free_formats_key)
677 req_format = self.params.get('format', 'best')
678 if req_format is None:
680 formats_to_download = []
681 # The -1 is for supporting YoutubeIE
682 if req_format in ('-1', 'all'):
683 formats_to_download = formats
685 # We can accept formats requestd in the format: 34/5/best, we pick
686 # the first that is available, starting from left
687 req_formats = req_format.split('/')
688 for rf in req_formats:
689 selected_format = self.select_format(rf, formats)
690 if selected_format is not None:
691 formats_to_download = [selected_format]
693 if not formats_to_download:
694 raise ExtractorError(u'requested format not available',
698 if len(formats_to_download) > 1:
699 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
700 for format in formats_to_download:
701 new_info = dict(info_dict)
702 new_info.update(format)
703 self.process_info(new_info)
704 # We update the info dict with the best quality format (backwards compatibility)
705 info_dict.update(formats_to_download[-1])
708 def process_info(self, info_dict):
709 """Process a single resolved IE result."""
711 assert info_dict.get('_type', 'video') == 'video'
712 #We increment the download the download count here to match the previous behaviour.
713 self.increment_downloads()
715 info_dict['fulltitle'] = info_dict['title']
716 if len(info_dict['title']) > 200:
717 info_dict['title'] = info_dict['title'][:197] + u'...'
719 # Keep for backwards compatibility
720 info_dict['stitle'] = info_dict['title']
722 if not 'format' in info_dict:
723 info_dict['format'] = info_dict['ext']
725 reason = self._match_entry(info_dict)
726 if reason is not None:
727 self.to_screen(u'[download] ' + reason)
730 max_downloads = self.params.get('max_downloads')
731 if max_downloads is not None:
732 if self._num_downloads > int(max_downloads):
733 raise MaxDownloadsReached()
735 filename = self.prepare_filename(info_dict)
738 if self.params.get('forcetitle', False):
739 self.to_stdout(info_dict['fulltitle'])
740 if self.params.get('forceid', False):
741 self.to_stdout(info_dict['id'])
742 if self.params.get('forceurl', False):
743 # For RTMP URLs, also include the playpath
744 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
745 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
746 self.to_stdout(info_dict['thumbnail'])
747 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
748 self.to_stdout(info_dict['description'])
749 if self.params.get('forcefilename', False) and filename is not None:
750 self.to_stdout(filename)
751 if self.params.get('forceformat', False):
752 self.to_stdout(info_dict['format'])
753 if self.params.get('forcejson', False):
754 self.to_stdout(json.dumps(info_dict))
756 # Do nothing else if in simulate mode
757 if self.params.get('simulate', False):
764 dn = os.path.dirname(encodeFilename(filename))
765 if dn != '' and not os.path.exists(dn):
767 except (OSError, IOError) as err:
768 self.report_error(u'unable to create directory ' + compat_str(err))
771 if self.params.get('writedescription', False):
773 descfn = filename + u'.description'
774 self.report_writedescription(descfn)
775 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
776 descfile.write(info_dict['description'])
777 except (KeyError, TypeError):
778 self.report_warning(u'There\'s no description to write.')
779 except (OSError, IOError):
780 self.report_error(u'Cannot write description file ' + descfn)
783 if self.params.get('writeannotations', False):
785 annofn = filename + u'.annotations.xml'
786 self.report_writeannotations(annofn)
787 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
788 annofile.write(info_dict['annotations'])
789 except (KeyError, TypeError):
790 self.report_warning(u'There are no annotations to write.')
791 except (OSError, IOError):
792 self.report_error(u'Cannot write annotations file: ' + annofn)
795 subtitles_are_requested = any([self.params.get('writesubtitles', False),
796 self.params.get('writeautomaticsub')])
798 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
799 # subtitles download errors are already managed as troubles in relevant IE
800 # that way it will silently go on when used with unsupporting IE
801 subtitles = info_dict['subtitles']
802 sub_format = self.params.get('subtitlesformat', 'srt')
803 for sub_lang in subtitles.keys():
804 sub = subtitles[sub_lang]
808 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
809 self.report_writesubtitles(sub_filename)
810 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
812 except (OSError, IOError):
813 self.report_error(u'Cannot write subtitles file ' + descfn)
816 if self.params.get('writeinfojson', False):
817 infofn = os.path.splitext(filename)[0] + u'.info.json'
818 self.report_writeinfojson(infofn)
820 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
821 write_json_file(json_info_dict, encodeFilename(infofn))
822 except (OSError, IOError):
823 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
826 if self.params.get('writethumbnail', False):
827 if info_dict.get('thumbnail') is not None:
828 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
829 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
830 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
831 (info_dict['extractor'], info_dict['id']))
833 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
834 with open(thumb_filename, 'wb') as thumbf:
835 shutil.copyfileobj(uf, thumbf)
836 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
837 (info_dict['extractor'], info_dict['id'], thumb_filename))
838 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
839 self.report_warning(u'Unable to download thumbnail "%s": %s' %
840 (info_dict['thumbnail'], compat_str(err)))
842 if not self.params.get('skip_download', False):
843 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
847 success = self.fd._do_download(filename, info_dict)
848 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
849 self.report_error(u'unable to download video data: %s' % str(err))
851 except (OSError, IOError) as err:
852 raise UnavailableVideoError(err)
853 except (ContentTooShortError, ) as err:
854 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
859 self.post_process(filename, info_dict)
860 except (PostProcessingError) as err:
861 self.report_error(u'postprocessing: %s' % str(err))
864 self.record_download_archive(info_dict)
866 def download(self, url_list):
867 """Download a given list of URLs."""
868 if (len(url_list) > 1 and
869 '%' not in self.params['outtmpl']
870 and self.params.get('max_downloads') != 1):
871 raise SameFileError(self.params['outtmpl'])
875 #It also downloads the videos
876 self.extract_info(url)
877 except UnavailableVideoError:
878 self.report_error(u'unable to download video')
879 except MaxDownloadsReached:
880 self.to_screen(u'[info] Maximum number of downloaded files reached.')
883 return self._download_retcode
885 def post_process(self, filename, ie_info):
886 """Run all the postprocessors on the given file."""
888 info['filepath'] = filename
892 keep_video_wish, new_info = pp.run(info)
893 if keep_video_wish is not None:
895 keep_video = keep_video_wish
896 elif keep_video is None:
897 # No clear decision yet, let IE decide
898 keep_video = keep_video_wish
899 except PostProcessingError as e:
900 self.report_error(e.msg)
901 if keep_video is False and not self.params.get('keepvideo', False):
903 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
904 os.remove(encodeFilename(filename))
905 except (IOError, OSError):
906 self.report_warning(u'Unable to remove downloaded video file')
908 def _make_archive_id(self, info_dict):
909 # Future-proof against any change in case
910 # and backwards compatibility with prior versions
911 extractor = info_dict.get('extractor_key')
912 if extractor is None:
913 if 'id' in info_dict:
914 extractor = info_dict.get('ie_key') # key in a playlist
915 if extractor is None:
916 return None # Incomplete video information
917 return extractor.lower() + u' ' + info_dict['id']
919 def in_download_archive(self, info_dict):
920 fn = self.params.get('download_archive')
924 vid_id = self._make_archive_id(info_dict)
926 return False # Incomplete video information
929 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
930 for line in archive_file:
931 if line.strip() == vid_id:
933 except IOError as ioe:
934 if ioe.errno != errno.ENOENT:
938 def record_download_archive(self, info_dict):
939 fn = self.params.get('download_archive')
942 vid_id = self._make_archive_id(info_dict)
944 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
945 archive_file.write(vid_id + u'\n')
948 def format_resolution(format, default='unknown'):
949 if format.get('vcodec') == 'none':
951 if format.get('_resolution') is not None:
952 return format['_resolution']
953 if format.get('height') is not None:
954 if format.get('width') is not None:
955 res = u'%sx%s' % (format['width'], format['height'])
957 res = u'%sp' % format['height']
962 def list_formats(self, info_dict):
963 def format_note(fdict):
965 if fdict.get('format_note') is not None:
966 res += fdict['format_note'] + u' '
967 if (fdict.get('vcodec') is not None and
968 fdict.get('vcodec') != 'none'):
969 res += u'%-5s' % fdict['vcodec']
970 elif fdict.get('vbr') is not None:
972 if fdict.get('vbr') is not None:
973 res += u'@%4dk' % fdict['vbr']
974 if fdict.get('acodec') is not None:
977 res += u'%-5s' % fdict['acodec']
978 elif fdict.get('abr') is not None:
982 if fdict.get('abr') is not None:
983 res += u'@%3dk' % fdict['abr']
984 if fdict.get('filesize') is not None:
987 res += format_bytes(fdict['filesize'])
990 def line(format, idlen=20):
991 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
994 self.format_resolution(format),
998 formats = info_dict.get('formats', [info_dict])
999 idlen = max(len(u'format code'),
1000 max(len(f['format_id']) for f in formats))
1001 formats_s = [line(f, idlen) for f in formats]
1002 if len(formats) > 1:
1003 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1004 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1006 header_line = line({
1007 'format_id': u'format code', 'ext': u'extension',
1008 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1009 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1010 (info_dict['id'], header_line, u"\n".join(formats_s)))
1012 def urlopen(self, req):
1013 """ Start an HTTP download """
1014 return self._opener.open(req)
1016 def print_debug_header(self):
1017 if not self.params.get('verbose'):
1019 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1021 sp = subprocess.Popen(
1022 ['git', 'rev-parse', '--short', 'HEAD'],
1023 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1024 cwd=os.path.dirname(os.path.abspath(__file__)))
1025 out, err = sp.communicate()
1026 out = out.decode().strip()
1027 if re.match('[0-9a-f]+', out):
1028 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1034 write_string(u'[debug] Python version %s - %s' %
1035 (platform.python_version(), platform_name()) + u'\n')
1038 for handler in self._opener.handlers:
1039 if hasattr(handler, 'proxies'):
1040 proxy_map.update(handler.proxies)
1041 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1043 def _setup_opener(self):
1044 timeout_val = self.params.get('socket_timeout')
1045 timeout = 600 if timeout_val is None else float(timeout_val)
1047 opts_cookiefile = self.params.get('cookiefile')
1048 opts_proxy = self.params.get('proxy')
1050 if opts_cookiefile is None:
1051 self.cookiejar = compat_cookiejar.CookieJar()
1053 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1055 if os.access(opts_cookiefile, os.R_OK):
1056 self.cookiejar.load()
1058 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1060 if opts_proxy is not None:
1061 if opts_proxy == '':
1064 proxies = {'http': opts_proxy, 'https': opts_proxy}
1066 proxies = compat_urllib_request.getproxies()
1067 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1068 if 'http' in proxies and 'https' not in proxies:
1069 proxies['https'] = proxies['http']
1070 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1071 https_handler = make_HTTPS_handler(
1072 self.params.get('nocheckcertificate', False))
1073 opener = compat_urllib_request.build_opener(
1074 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1075 # Delete the default user-agent header, which would otherwise apply in
1076 # cases where our custom HTTP handler doesn't come into play
1077 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1078 opener.addheaders = []
1079 self._opener = opener
1081 # TODO remove this global modification
1082 compat_urllib_request.install_opener(opener)
1083 socket.setdefaulttimeout(timeout)