2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
46 UnavailableVideoError,
51 from .extractor import get_info_extractor, gen_extractors
52 from .FileDownloader import FileDownloader
53 from .version import __version__
56 class YoutubeDL(object):
59 YoutubeDL objects are the ones responsible of downloading the
60 actual video file and writing it to disk if the user has requested
61 it, among some other tasks. In most cases there should be one per
62 program. As, given a video URL, the downloader doesn't know how to
63 extract all the needed information, task that InfoExtractors do, it
64 has to pass the URL to one of them.
66 For this, YoutubeDL objects have a method that allows
67 InfoExtractors to be registered in a given order. When it is passed
68 a URL, the YoutubeDL object handles it to the first InfoExtractor it
69 finds that reports being able to handle it. The InfoExtractor extracts
70 all the information about the video or videos the URL refers to, and
71 YoutubeDL process the extracted information, possibly using a File
72 Downloader to download the video.
74 YoutubeDL objects accept a lot of parameters. In order not to saturate
75 the object constructor with arguments, it receives a dictionary of
76 options instead. These options are available through the params
77 attribute for the InfoExtractors to use. The YoutubeDL also
78 registers itself as the downloader in charge for the InfoExtractors
79 that are added to it, so this is a "mutual registration".
83 username: Username for authentication purposes.
84 password: Password for authentication purposes.
85 videopassword: Password for acces a video.
86 usenetrc: Use netrc for authentication instead.
87 verbose: Print additional info to stdout.
88 quiet: Do not print messages to stdout.
89 forceurl: Force printing final URL.
90 forcetitle: Force printing title.
91 forceid: Force printing ID.
92 forcethumbnail: Force printing thumbnail URL.
93 forcedescription: Force printing description.
94 forcefilename: Force printing final filename.
95 forcejson: Force printing info_dict as JSON.
96 simulate: Do not download the video files.
97 format: Video format code.
98 format_limit: Highest quality format to try.
99 outtmpl: Template for output names.
100 restrictfilenames: Do not allow "&" and spaces in file names
101 ignoreerrors: Do not stop on download errors.
102 nooverwrites: Prevent overwriting files.
103 playliststart: Playlist item to start at.
104 playlistend: Playlist item to end at.
105 matchtitle: Download only matching titles.
106 rejecttitle: Reject downloads for matching titles.
107 logger: Log messages to a logging.Logger instance.
108 logtostderr: Log messages to stderr instead of stdout.
109 writedescription: Write the video description to a .description file
110 writeinfojson: Write the video description to a .info.json file
111 writeannotations: Write the video annotations to a .annotations.xml file
112 writethumbnail: Write the thumbnail image to a file
113 writesubtitles: Write the video subtitles to a file
114 writeautomaticsub: Write the automatic subtitles to a file
115 allsubtitles: Downloads all the subtitles of the video
116 (requires writesubtitles or writeautomaticsub)
117 listsubtitles: Lists all available subtitles for the video
118 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
119 subtitleslangs: List of languages of the subtitles to download
120 keepvideo: Keep the video file after post-processing
121 daterange: A DateRange object, download only if the upload_date is in the range.
122 skip_download: Skip the actual download of the video file
123 cachedir: Location of the cache files in the filesystem.
124 None to disable filesystem cache.
125 noplaylist: Download single video instead of a playlist if in doubt.
126 age_limit: An integer representing the user's age in years.
127 Unsuitable videos for the given age are skipped.
128 downloadarchive: File name of a file where all downloads are recorded.
129 Videos already present in the file are not downloaded
131 cookiefile: File name where cookies should be read from and dumped to.
132 nocheckcertificate:Do not verify SSL certificates
133 proxy: URL of the proxy server to use
135 The following parameters are not used by YoutubeDL itself, they are used by
137 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
138 noresizebuffer, retries, continuedl, noprogress, consoletitle
144 _download_retcode = None
145 _num_downloads = None
148 def __init__(self, params):
149 """Create a FileDownloader object with the given options."""
151 self._ies_instances = {}
153 self._progress_hooks = []
154 self._download_retcode = 0
155 self._num_downloads = 0
156 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
158 if (sys.version_info >= (3,) and sys.platform != 'win32' and
159 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
160 and not params['restrictfilenames']):
161 # On Python 3, the Unicode filesystem API will throw errors (#1474)
163 u'Assuming --restrict-filenames since file system encoding '
164 u'cannot encode all charactes. '
165 u'Set the LC_ALL environment variable to fix this.')
166 params['restrictfilenames'] = True
169 self.fd = FileDownloader(self, self.params)
171 if '%(stitle)s' in self.params['outtmpl']:
172 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
176 def add_info_extractor(self, ie):
177 """Add an InfoExtractor object to the end of the list."""
179 self._ies_instances[ie.ie_key()] = ie
180 ie.set_downloader(self)
182 def get_info_extractor(self, ie_key):
184 Get an instance of an IE with name ie_key, it will try to get one from
185 the _ies list, if there's no instance it will create a new one and add
186 it to the extractor list.
188 ie = self._ies_instances.get(ie_key)
190 ie = get_info_extractor(ie_key)()
191 self.add_info_extractor(ie)
194 def add_default_info_extractors(self):
196 Add the InfoExtractors returned by gen_extractors to the end of the list
198 for ie in gen_extractors():
199 self.add_info_extractor(ie)
201 def add_post_processor(self, pp):
202 """Add a PostProcessor object to the end of the chain."""
204 pp.set_downloader(self)
206 def to_screen(self, message, skip_eol=False):
207 """Print message to stdout if not in quiet mode."""
208 if self.params.get('logger'):
209 self.params['logger'].debug(message)
210 elif not self.params.get('quiet', False):
211 terminator = [u'\n', u''][skip_eol]
212 output = message + terminator
213 write_string(output, self._screen_file)
215 def to_stderr(self, message):
216 """Print message to stderr."""
217 assert type(message) == type(u'')
218 if self.params.get('logger'):
219 self.params['logger'].error(message)
221 output = message + u'\n'
222 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
223 output = output.encode(preferredencoding())
224 sys.stderr.write(output)
226 def to_console_title(self, message):
227 if not self.params.get('consoletitle', False):
229 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
230 # c_wchar_p() might not be necessary if `message` is
231 # already of type unicode()
232 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
233 elif 'TERM' in os.environ:
234 write_string(u'\033]0;%s\007' % message, self._screen_file)
236 def save_console_title(self):
237 if not self.params.get('consoletitle', False):
239 if 'TERM' in os.environ:
240 # Save the title on stack
241 write_string(u'\033[22;0t', self._screen_file)
243 def restore_console_title(self):
244 if not self.params.get('consoletitle', False):
246 if 'TERM' in os.environ:
247 # Restore the title from stack
248 write_string(u'\033[23;0t', self._screen_file)
251 self.save_console_title()
254 def __exit__(self, *args):
255 self.restore_console_title()
257 if self.params.get('cookiefile') is not None:
258 self.cookiejar.save()
260 def fixed_template(self):
261 """Checks if the output template is fixed."""
262 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
264 def trouble(self, message=None, tb=None):
265 """Determine action to take when a download problem appears.
267 Depending on if the downloader has been configured to ignore
268 download errors or not, this method may throw an exception or
269 not when errors are found, after printing the message.
271 tb, if given, is additional traceback information.
273 if message is not None:
274 self.to_stderr(message)
275 if self.params.get('verbose'):
277 if sys.exc_info()[0]: # if .trouble has been called from an except block
279 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
280 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
281 tb += compat_str(traceback.format_exc())
283 tb_data = traceback.format_list(traceback.extract_stack())
284 tb = u''.join(tb_data)
286 if not self.params.get('ignoreerrors', False):
287 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
288 exc_info = sys.exc_info()[1].exc_info
290 exc_info = sys.exc_info()
291 raise DownloadError(message, exc_info)
292 self._download_retcode = 1
294 def report_warning(self, message):
296 Print the message to stderr, it will be prefixed with 'WARNING:'
297 If stderr is a tty file the 'WARNING:' will be colored
299 if sys.stderr.isatty() and os.name != 'nt':
300 _msg_header = u'\033[0;33mWARNING:\033[0m'
302 _msg_header = u'WARNING:'
303 warning_message = u'%s %s' % (_msg_header, message)
304 self.to_stderr(warning_message)
306 def report_error(self, message, tb=None):
308 Do the same as trouble, but prefixes the message with 'ERROR:', colored
309 in red if stderr is a tty file.
311 if sys.stderr.isatty() and os.name != 'nt':
312 _msg_header = u'\033[0;31mERROR:\033[0m'
314 _msg_header = u'ERROR:'
315 error_message = u'%s %s' % (_msg_header, message)
316 self.trouble(error_message, tb)
318 def report_writedescription(self, descfn):
319 """ Report that the description file is being written """
320 self.to_screen(u'[info] Writing video description to: ' + descfn)
322 def report_writesubtitles(self, sub_filename):
323 """ Report that the subtitles file is being written """
324 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
326 def report_writeinfojson(self, infofn):
327 """ Report that the metadata file has been written """
328 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
330 def report_writeannotations(self, annofn):
331 """ Report that the annotations file has been written. """
332 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
334 def report_file_already_downloaded(self, file_name):
335 """Report file has already been fully downloaded."""
337 self.to_screen(u'[download] %s has already been downloaded' % file_name)
338 except UnicodeEncodeError:
339 self.to_screen(u'[download] The file has already been downloaded')
341 def increment_downloads(self):
342 """Increment the ordinal that assigns a number to each file."""
343 self._num_downloads += 1
345 def prepare_filename(self, info_dict):
346 """Generate the output filename."""
348 template_dict = dict(info_dict)
350 template_dict['epoch'] = int(time.time())
351 autonumber_size = self.params.get('autonumber_size')
352 if autonumber_size is None:
354 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
355 template_dict['autonumber'] = autonumber_templ % self._num_downloads
356 if template_dict.get('playlist_index') is not None:
357 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
359 sanitize = lambda k, v: sanitize_filename(
360 u'NA' if v is None else compat_str(v),
361 restricted=self.params.get('restrictfilenames'),
363 template_dict = dict((k, sanitize(k, v))
364 for k, v in template_dict.items())
366 tmpl = os.path.expanduser(self.params['outtmpl'])
367 filename = tmpl % template_dict
369 except KeyError as err:
370 self.report_error(u'Erroneous output template')
372 except ValueError as err:
373 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
376 def _match_entry(self, info_dict):
377 """ Returns None iff the file should be downloaded """
379 if 'title' in info_dict:
380 # This can happen when we're just evaluating the playlist
381 title = info_dict['title']
382 matchtitle = self.params.get('matchtitle', False)
384 if not re.search(matchtitle, title, re.IGNORECASE):
385 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
386 rejecttitle = self.params.get('rejecttitle', False)
388 if re.search(rejecttitle, title, re.IGNORECASE):
389 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
390 date = info_dict.get('upload_date', None)
392 dateRange = self.params.get('daterange', DateRange())
393 if date not in dateRange:
394 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
395 age_limit = self.params.get('age_limit')
396 if age_limit is not None:
397 if age_limit < info_dict.get('age_limit', 0):
398 return u'Skipping "' + title + '" because it is age restricted'
399 if self.in_download_archive(info_dict):
400 return (u'%s has already been recorded in archive'
401 % info_dict.get('title', info_dict.get('id', u'video')))
405 def add_extra_info(info_dict, extra_info):
406 '''Set the keys from extra_info in info dict if they are missing'''
407 for key, value in extra_info.items():
408 info_dict.setdefault(key, value)
410 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
412 Returns a list with a dictionary for each video we find.
413 If 'download', also downloads the videos.
414 extra_info is a dict containing the extra values to add to each result
418 ies = [self.get_info_extractor(ie_key)]
423 if not ie.suitable(url):
427 self.report_warning(u'The program functionality for this site has been marked as broken, '
428 u'and will probably not work.')
431 ie_result = ie.extract(url)
432 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
434 if isinstance(ie_result, list):
435 # Backwards compatibility: old IE result format
437 '_type': 'compat_list',
438 'entries': ie_result,
440 self.add_extra_info(ie_result,
442 'extractor': ie.IE_NAME,
444 'extractor_key': ie.ie_key(),
446 return self.process_ie_result(ie_result, download, extra_info)
447 except ExtractorError as de: # An error we somewhat expected
448 self.report_error(compat_str(de), de.format_traceback())
450 except Exception as e:
451 if self.params.get('ignoreerrors', False):
452 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
457 self.report_error(u'no suitable InfoExtractor: %s' % url)
459 def process_ie_result(self, ie_result, download=True, extra_info={}):
461 Take the result of the ie(may be modified) and resolve all unresolved
462 references (URLs, playlist items).
464 It will also download the videos if 'download'.
465 Returns the resolved ie_result.
468 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
469 if result_type == 'video':
470 self.add_extra_info(ie_result, extra_info)
471 return self.process_video_result(ie_result, download=download)
472 elif result_type == 'url':
473 # We have to add extra_info to the results because it may be
474 # contained in a playlist
475 return self.extract_info(ie_result['url'],
477 ie_key=ie_result.get('ie_key'),
478 extra_info=extra_info)
479 elif result_type == 'playlist':
481 # We process each entry in the playlist
482 playlist = ie_result.get('title', None) or ie_result.get('id', None)
483 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
485 playlist_results = []
487 n_all_entries = len(ie_result['entries'])
488 playliststart = self.params.get('playliststart', 1) - 1
489 playlistend = self.params.get('playlistend', -1)
491 if playlistend == -1:
492 entries = ie_result['entries'][playliststart:]
494 entries = ie_result['entries'][playliststart:playlistend]
496 n_entries = len(entries)
498 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
499 (ie_result['extractor'], playlist, n_all_entries, n_entries))
501 for i, entry in enumerate(entries, 1):
502 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
504 'playlist': playlist,
505 'playlist_index': i + playliststart,
506 'extractor': ie_result['extractor'],
507 'webpage_url': ie_result['webpage_url'],
508 'extractor_key': ie_result['extractor_key'],
511 reason = self._match_entry(entry)
512 if reason is not None:
513 self.to_screen(u'[download] ' + reason)
516 entry_result = self.process_ie_result(entry,
519 playlist_results.append(entry_result)
520 ie_result['entries'] = playlist_results
522 elif result_type == 'compat_list':
524 self.add_extra_info(r,
526 'extractor': ie_result['extractor'],
527 'webpage_url': ie_result['webpage_url'],
528 'extractor_key': ie_result['extractor_key'],
531 ie_result['entries'] = [
532 self.process_ie_result(_fixup(r), download, extra_info)
533 for r in ie_result['entries']
537 raise Exception('Invalid result type: %s' % result_type)
539 def select_format(self, format_spec, available_formats):
540 if format_spec == 'best' or format_spec is None:
541 return available_formats[-1]
542 elif format_spec == 'worst':
543 return available_formats[0]
545 extensions = [u'mp4', u'flv', u'webm', u'3gp']
546 if format_spec in extensions:
547 filter_f = lambda f: f['ext'] == format_spec
549 filter_f = lambda f: f['format_id'] == format_spec
550 matches = list(filter(filter_f, available_formats))
555 def process_video_result(self, info_dict, download=True):
556 assert info_dict.get('_type', 'video') == 'video'
558 if 'playlist' not in info_dict:
559 # It isn't part of a playlist
560 info_dict['playlist'] = None
561 info_dict['playlist_index'] = None
563 # This extractors handle format selection themselves
564 if info_dict['extractor'] in [u'youtube', u'Youku']:
566 self.process_info(info_dict)
569 # We now pick which formats have to be downloaded
570 if info_dict.get('formats') is None:
571 # There's only one format available
572 formats = [info_dict]
574 formats = info_dict['formats']
576 # We check that all the formats have the format and format_id fields
577 for (i, format) in enumerate(formats):
578 if format.get('format_id') is None:
579 format['format_id'] = compat_str(i)
580 if format.get('format') is None:
581 format['format'] = u'{id} - {res}{note}'.format(
582 id=format['format_id'],
583 res=self.format_resolution(format),
584 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
586 # Automatically determine file extension if missing
587 if 'ext' not in format:
588 format['ext'] = determine_ext(format['url'])
590 if self.params.get('listformats', None):
591 self.list_formats(info_dict)
594 format_limit = self.params.get('format_limit', None)
596 formats = list(takewhile_inclusive(
597 lambda f: f['format_id'] != format_limit, formats
599 if self.params.get('prefer_free_formats'):
600 def _free_formats_key(f):
602 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
605 # We only compare the extension if they have the same height and width
606 return (f.get('height'), f.get('width'), ext_ord)
607 formats = sorted(formats, key=_free_formats_key)
609 req_format = self.params.get('format', 'best')
610 if req_format is None:
612 formats_to_download = []
613 # The -1 is for supporting YoutubeIE
614 if req_format in ('-1', 'all'):
615 formats_to_download = formats
617 # We can accept formats requestd in the format: 34/5/best, we pick
618 # the first that is available, starting from left
619 req_formats = req_format.split('/')
620 for rf in req_formats:
621 selected_format = self.select_format(rf, formats)
622 if selected_format is not None:
623 formats_to_download = [selected_format]
625 if not formats_to_download:
626 raise ExtractorError(u'requested format not available',
630 if len(formats_to_download) > 1:
631 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
632 for format in formats_to_download:
633 new_info = dict(info_dict)
634 new_info.update(format)
635 self.process_info(new_info)
636 # We update the info dict with the best quality format (backwards compatibility)
637 info_dict.update(formats_to_download[-1])
640 def process_info(self, info_dict):
641 """Process a single resolved IE result."""
643 assert info_dict.get('_type', 'video') == 'video'
644 #We increment the download the download count here to match the previous behaviour.
645 self.increment_downloads()
647 info_dict['fulltitle'] = info_dict['title']
648 if len(info_dict['title']) > 200:
649 info_dict['title'] = info_dict['title'][:197] + u'...'
651 # Keep for backwards compatibility
652 info_dict['stitle'] = info_dict['title']
654 if not 'format' in info_dict:
655 info_dict['format'] = info_dict['ext']
657 reason = self._match_entry(info_dict)
658 if reason is not None:
659 self.to_screen(u'[download] ' + reason)
662 max_downloads = self.params.get('max_downloads')
663 if max_downloads is not None:
664 if self._num_downloads > int(max_downloads):
665 raise MaxDownloadsReached()
667 filename = self.prepare_filename(info_dict)
670 if self.params.get('forcetitle', False):
671 compat_print(info_dict['fulltitle'])
672 if self.params.get('forceid', False):
673 compat_print(info_dict['id'])
674 if self.params.get('forceurl', False):
675 # For RTMP URLs, also include the playpath
676 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
677 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
678 compat_print(info_dict['thumbnail'])
679 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
680 compat_print(info_dict['description'])
681 if self.params.get('forcefilename', False) and filename is not None:
682 compat_print(filename)
683 if self.params.get('forceformat', False):
684 compat_print(info_dict['format'])
685 if self.params.get('forcejson', False):
686 compat_print(json.dumps(info_dict))
688 # Do nothing else if in simulate mode
689 if self.params.get('simulate', False):
696 dn = os.path.dirname(encodeFilename(filename))
697 if dn != '' and not os.path.exists(dn):
699 except (OSError, IOError) as err:
700 self.report_error(u'unable to create directory ' + compat_str(err))
703 if self.params.get('writedescription', False):
705 descfn = filename + u'.description'
706 self.report_writedescription(descfn)
707 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
708 descfile.write(info_dict['description'])
709 except (KeyError, TypeError):
710 self.report_warning(u'There\'s no description to write.')
711 except (OSError, IOError):
712 self.report_error(u'Cannot write description file ' + descfn)
715 if self.params.get('writeannotations', False):
717 annofn = filename + u'.annotations.xml'
718 self.report_writeannotations(annofn)
719 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
720 annofile.write(info_dict['annotations'])
721 except (KeyError, TypeError):
722 self.report_warning(u'There are no annotations to write.')
723 except (OSError, IOError):
724 self.report_error(u'Cannot write annotations file: ' + annofn)
727 subtitles_are_requested = any([self.params.get('writesubtitles', False),
728 self.params.get('writeautomaticsub')])
730 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
731 # subtitles download errors are already managed as troubles in relevant IE
732 # that way it will silently go on when used with unsupporting IE
733 subtitles = info_dict['subtitles']
734 sub_format = self.params.get('subtitlesformat', 'srt')
735 for sub_lang in subtitles.keys():
736 sub = subtitles[sub_lang]
740 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
741 self.report_writesubtitles(sub_filename)
742 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
744 except (OSError, IOError):
745 self.report_error(u'Cannot write subtitles file ' + descfn)
748 if self.params.get('writeinfojson', False):
749 infofn = os.path.splitext(filename)[0] + u'.info.json'
750 self.report_writeinfojson(infofn)
752 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
753 write_json_file(json_info_dict, encodeFilename(infofn))
754 except (OSError, IOError):
755 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
758 if self.params.get('writethumbnail', False):
759 if info_dict.get('thumbnail') is not None:
760 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
761 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
762 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
763 (info_dict['extractor'], info_dict['id']))
765 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
766 with open(thumb_filename, 'wb') as thumbf:
767 shutil.copyfileobj(uf, thumbf)
768 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
769 (info_dict['extractor'], info_dict['id'], thumb_filename))
770 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
771 self.report_warning(u'Unable to download thumbnail "%s": %s' %
772 (info_dict['thumbnail'], compat_str(err)))
774 if not self.params.get('skip_download', False):
775 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
779 success = self.fd._do_download(filename, info_dict)
780 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
781 self.report_error(u'unable to download video data: %s' % str(err))
783 except (OSError, IOError) as err:
784 raise UnavailableVideoError(err)
785 except (ContentTooShortError, ) as err:
786 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
791 self.post_process(filename, info_dict)
792 except (PostProcessingError) as err:
793 self.report_error(u'postprocessing: %s' % str(err))
796 self.record_download_archive(info_dict)
798 def download(self, url_list):
799 """Download a given list of URLs."""
800 if len(url_list) > 1 and self.fixed_template():
801 raise SameFileError(self.params['outtmpl'])
805 #It also downloads the videos
806 self.extract_info(url)
807 except UnavailableVideoError:
808 self.report_error(u'unable to download video')
809 except MaxDownloadsReached:
810 self.to_screen(u'[info] Maximum number of downloaded files reached.')
813 return self._download_retcode
815 def post_process(self, filename, ie_info):
816 """Run all the postprocessors on the given file."""
818 info['filepath'] = filename
822 keep_video_wish, new_info = pp.run(info)
823 if keep_video_wish is not None:
825 keep_video = keep_video_wish
826 elif keep_video is None:
827 # No clear decision yet, let IE decide
828 keep_video = keep_video_wish
829 except PostProcessingError as e:
830 self.report_error(e.msg)
831 if keep_video is False and not self.params.get('keepvideo', False):
833 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
834 os.remove(encodeFilename(filename))
835 except (IOError, OSError):
836 self.report_warning(u'Unable to remove downloaded video file')
838 def in_download_archive(self, info_dict):
839 fn = self.params.get('download_archive')
842 extractor = info_dict.get('extractor_id')
843 if extractor is None:
844 if 'id' in info_dict:
845 extractor = info_dict.get('ie_key') # key in a playlist
846 if extractor is None:
847 return False # Incomplete video information
848 # Future-proof against any change in case
849 # and backwards compatibility with prior versions
850 extractor = extractor.lower()
851 vid_id = extractor + u' ' + info_dict['id']
853 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
854 for line in archive_file:
855 if line.strip() == vid_id:
857 except IOError as ioe:
858 if ioe.errno != errno.ENOENT:
862 def record_download_archive(self, info_dict):
863 fn = self.params.get('download_archive')
866 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
867 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
868 archive_file.write(vid_id + u'\n')
871 def format_resolution(format, default='unknown'):
872 if format.get('_resolution') is not None:
873 return format['_resolution']
874 if format.get('height') is not None:
875 if format.get('width') is not None:
876 res = u'%sx%s' % (format['width'], format['height'])
878 res = u'%sp' % format['height']
883 def list_formats(self, info_dict):
884 def format_note(fdict):
885 if fdict.get('format_note') is not None:
886 return fdict['format_note']
888 if fdict.get('vcodec') is not None:
889 res += u'%-5s' % fdict['vcodec']
890 elif fdict.get('vbr') is not None:
892 if fdict.get('vbr') is not None:
893 res += u'@%4dk' % fdict['vbr']
894 if fdict.get('acodec') is not None:
897 res += u'%-5s' % fdict['acodec']
898 elif fdict.get('abr') is not None:
902 if fdict.get('abr') is not None:
903 res += u'@%3dk' % fdict['abr']
907 return (u'%-20s%-10s%-12s%s' % (
910 self.format_resolution(format),
915 formats = info_dict.get('formats', [info_dict])
916 formats_s = list(map(line, formats))
918 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
919 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
922 'format_id': u'format code', 'ext': u'extension',
923 '_resolution': u'resolution', 'format_note': u'note'})
924 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
925 (info_dict['id'], header_line, u"\n".join(formats_s)))
927 def urlopen(self, req):
928 """ Start an HTTP download """
929 return self._opener.open(req)
931 def print_debug_header(self):
932 if not self.params.get('verbose'):
934 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
936 sp = subprocess.Popen(
937 ['git', 'rev-parse', '--short', 'HEAD'],
938 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
939 cwd=os.path.dirname(os.path.abspath(__file__)))
940 out, err = sp.communicate()
941 out = out.decode().strip()
942 if re.match('[0-9a-f]+', out):
943 write_string(u'[debug] Git HEAD: ' + out + u'\n')
949 write_string(u'[debug] Python version %s - %s' %
950 (platform.python_version(), platform_name()) + u'\n')
953 for handler in self._opener.handlers:
954 if hasattr(handler, 'proxies'):
955 proxy_map.update(handler.proxies)
956 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
958 def _setup_opener(self, timeout=300):
959 opts_cookiefile = self.params.get('cookiefile')
960 opts_proxy = self.params.get('proxy')
962 if opts_cookiefile is None:
963 self.cookiejar = compat_cookiejar.CookieJar()
965 self.cookiejar = compat_cookiejar.MozillaCookieJar(
967 if os.access(opts_cookiefile, os.R_OK):
968 self.cookiejar.load()
970 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
972 if opts_proxy is not None:
976 proxies = {'http': opts_proxy, 'https': opts_proxy}
978 proxies = compat_urllib_request.getproxies()
979 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
980 if 'http' in proxies and 'https' not in proxies:
981 proxies['https'] = proxies['http']
982 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
983 https_handler = make_HTTPS_handler(
984 self.params.get('nocheckcertificate', False))
985 opener = compat_urllib_request.build_opener(
986 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
987 # Delete the default user-agent header, which would otherwise apply in
988 # cases where our custom HTTP handler doesn't come into play
989 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
990 opener.addheaders = []
991 self._opener = opener
993 # TODO remove this global modification
994 compat_urllib_request.install_opener(opener)
995 socket.setdefaulttimeout(timeout)