2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
16 from .extractor import get_info_extractor, gen_extractors
17 from .FileDownloader import FileDownloader
20 class YoutubeDL(object):
23 YoutubeDL objects are the ones responsible of downloading the
24 actual video file and writing it to disk if the user has requested
25 it, among some other tasks. In most cases there should be one per
26 program. As, given a video URL, the downloader doesn't know how to
27 extract all the needed information, task that InfoExtractors do, it
28 has to pass the URL to one of them.
30 For this, YoutubeDL objects have a method that allows
31 InfoExtractors to be registered in a given order. When it is passed
32 a URL, the YoutubeDL object handles it to the first InfoExtractor it
33 finds that reports being able to handle it. The InfoExtractor extracts
34 all the information about the video or videos the URL refers to, and
35 YoutubeDL process the extracted information, possibly using a File
36 Downloader to download the video.
38 YoutubeDL objects accept a lot of parameters. In order not to saturate
39 the object constructor with arguments, it receives a dictionary of
40 options instead. These options are available through the params
41 attribute for the InfoExtractors to use. The YoutubeDL also
42 registers itself as the downloader in charge for the InfoExtractors
43 that are added to it, so this is a "mutual registration".
47 username: Username for authentication purposes.
48 password: Password for authentication purposes.
49 videopassword: Password for acces a video.
50 usenetrc: Use netrc for authentication instead.
51 verbose: Print additional info to stdout.
52 quiet: Do not print messages to stdout.
53 forceurl: Force printing final URL.
54 forcetitle: Force printing title.
55 forceid: Force printing ID.
56 forcethumbnail: Force printing thumbnail URL.
57 forcedescription: Force printing description.
58 forcefilename: Force printing final filename.
59 simulate: Do not download the video files.
60 format: Video format code.
61 format_limit: Highest quality format to try.
62 outtmpl: Template for output names.
63 restrictfilenames: Do not allow "&" and spaces in file names
64 ignoreerrors: Do not stop on download errors.
65 nooverwrites: Prevent overwriting files.
66 playliststart: Playlist item to start at.
67 playlistend: Playlist item to end at.
68 matchtitle: Download only matching titles.
69 rejecttitle: Reject downloads for matching titles.
70 logtostderr: Log messages to stderr instead of stdout.
71 writedescription: Write the video description to a .description file
72 writeinfojson: Write the video description to a .info.json file
73 writethumbnail: Write the thumbnail image to a file
74 writesubtitles: Write the video subtitles to a file
75 writeautomaticsub: Write the automatic subtitles to a file
76 allsubtitles: Downloads all the subtitles of the video
77 (requires writesubtitles or writeautomaticsub)
78 listsubtitles: Lists all available subtitles for the video
79 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
80 subtitleslangs: List of languages of the subtitles to download
81 keepvideo: Keep the video file after post-processing
82 daterange: A DateRange object, download only if the upload_date is in the range.
83 skip_download: Skip the actual download of the video file
84 cachedir: Location of the cache files in the filesystem.
85 None to disable filesystem cache.
86 noplaylist: Download single video instead of a playlist if in doubt.
87 age_limit: An integer representing the user's age in years.
88 Unsuitable videos for the given age are skipped.
90 The following parameters are not used by YoutubeDL itself, they are used by
92 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
93 noresizebuffer, retries, continuedl, noprogress, consoletitle
99 _download_retcode = None
100 _num_downloads = None
103 def __init__(self, params):
104 """Create a FileDownloader object with the given options."""
106 self._ies_instances = {}
108 self._progress_hooks = []
109 self._download_retcode = 0
110 self._num_downloads = 0
111 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
113 if (sys.version_info >= (3,) and sys.platform != 'win32' and
114 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
115 and not params['restrictfilenames']):
116 # On Python 3, the Unicode filesystem API will throw errors (#1474)
118 u'Assuming --restrict-filenames isnce file system encoding '
119 u'cannot encode all charactes. '
120 u'Set the LC_ALL environment variable to fix this.')
121 params['restrictfilenames'] = True
124 self.fd = FileDownloader(self, self.params)
126 if '%(stitle)s' in self.params['outtmpl']:
127 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
129 def add_info_extractor(self, ie):
130 """Add an InfoExtractor object to the end of the list."""
132 self._ies_instances[ie.ie_key()] = ie
133 ie.set_downloader(self)
135 def get_info_extractor(self, ie_key):
137 Get an instance of an IE with name ie_key, it will try to get one from
138 the _ies list, if there's no instance it will create a new one and add
139 it to the extractor list.
141 ie = self._ies_instances.get(ie_key)
143 ie = get_info_extractor(ie_key)()
144 self.add_info_extractor(ie)
147 def add_default_info_extractors(self):
149 Add the InfoExtractors returned by gen_extractors to the end of the list
151 for ie in gen_extractors():
152 self.add_info_extractor(ie)
154 def add_post_processor(self, pp):
155 """Add a PostProcessor object to the end of the chain."""
157 pp.set_downloader(self)
159 def to_screen(self, message, skip_eol=False):
160 """Print message to stdout if not in quiet mode."""
161 if not self.params.get('quiet', False):
162 terminator = [u'\n', u''][skip_eol]
163 output = message + terminator
164 write_string(output, self._screen_file)
166 def to_stderr(self, message):
167 """Print message to stderr."""
168 assert type(message) == type(u'')
169 output = message + u'\n'
170 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
171 output = output.encode(preferredencoding())
172 sys.stderr.write(output)
174 def fixed_template(self):
175 """Checks if the output template is fixed."""
176 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
178 def trouble(self, message=None, tb=None):
179 """Determine action to take when a download problem appears.
181 Depending on if the downloader has been configured to ignore
182 download errors or not, this method may throw an exception or
183 not when errors are found, after printing the message.
185 tb, if given, is additional traceback information.
187 if message is not None:
188 self.to_stderr(message)
189 if self.params.get('verbose'):
191 if sys.exc_info()[0]: # if .trouble has been called from an except block
193 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
194 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
195 tb += compat_str(traceback.format_exc())
197 tb_data = traceback.format_list(traceback.extract_stack())
198 tb = u''.join(tb_data)
200 if not self.params.get('ignoreerrors', False):
201 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
202 exc_info = sys.exc_info()[1].exc_info
204 exc_info = sys.exc_info()
205 raise DownloadError(message, exc_info)
206 self._download_retcode = 1
208 def report_warning(self, message):
210 Print the message to stderr, it will be prefixed with 'WARNING:'
211 If stderr is a tty file the 'WARNING:' will be colored
213 if sys.stderr.isatty() and os.name != 'nt':
214 _msg_header=u'\033[0;33mWARNING:\033[0m'
216 _msg_header=u'WARNING:'
217 warning_message=u'%s %s' % (_msg_header,message)
218 self.to_stderr(warning_message)
220 def report_error(self, message, tb=None):
222 Do the same as trouble, but prefixes the message with 'ERROR:', colored
223 in red if stderr is a tty file.
225 if sys.stderr.isatty() and os.name != 'nt':
226 _msg_header = u'\033[0;31mERROR:\033[0m'
228 _msg_header = u'ERROR:'
229 error_message = u'%s %s' % (_msg_header, message)
230 self.trouble(error_message, tb)
232 def slow_down(self, start_time, byte_counter):
233 """Sleep if the download speed is over the rate limit."""
234 rate_limit = self.params.get('ratelimit', None)
235 if rate_limit is None or byte_counter == 0:
238 elapsed = now - start_time
241 speed = float(byte_counter) / elapsed
242 if speed > rate_limit:
243 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
245 def report_writedescription(self, descfn):
246 """ Report that the description file is being written """
247 self.to_screen(u'[info] Writing video description to: ' + descfn)
249 def report_writesubtitles(self, sub_filename):
250 """ Report that the subtitles file is being written """
251 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
253 def report_writeinfojson(self, infofn):
254 """ Report that the metadata file has been written """
255 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
257 def report_file_already_downloaded(self, file_name):
258 """Report file has already been fully downloaded."""
260 self.to_screen(u'[download] %s has already been downloaded' % file_name)
261 except (UnicodeEncodeError) as err:
262 self.to_screen(u'[download] The file has already been downloaded')
264 def increment_downloads(self):
265 """Increment the ordinal that assigns a number to each file."""
266 self._num_downloads += 1
268 def prepare_filename(self, info_dict):
269 """Generate the output filename."""
271 template_dict = dict(info_dict)
273 template_dict['epoch'] = int(time.time())
274 autonumber_size = self.params.get('autonumber_size')
275 if autonumber_size is None:
277 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
278 template_dict['autonumber'] = autonumber_templ % self._num_downloads
279 if template_dict['playlist_index'] is not None:
280 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
282 sanitize = lambda k,v: sanitize_filename(
283 u'NA' if v is None else compat_str(v),
284 restricted=self.params.get('restrictfilenames'),
286 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
288 filename = self.params['outtmpl'] % template_dict
290 except KeyError as err:
291 self.report_error(u'Erroneous output template')
293 except ValueError as err:
294 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
297 def _match_entry(self, info_dict):
298 """ Returns None iff the file should be downloaded """
300 title = info_dict['title']
301 matchtitle = self.params.get('matchtitle', False)
303 if not re.search(matchtitle, title, re.IGNORECASE):
304 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
305 rejecttitle = self.params.get('rejecttitle', False)
307 if re.search(rejecttitle, title, re.IGNORECASE):
308 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
309 date = info_dict.get('upload_date', None)
311 dateRange = self.params.get('daterange', DateRange())
312 if date not in dateRange:
313 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
314 age_limit = self.params.get('age_limit')
315 if age_limit is not None:
316 if age_limit < info_dict.get('age_restriction', 0):
317 return u'Skipping "' + title + '" because it is age restricted'
320 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
322 Returns a list with a dictionary for each video we find.
323 If 'download', also downloads the videos.
324 extra_info is a dict containing the extra values to add to each result
328 ies = [self.get_info_extractor(ie_key)]
333 if not ie.suitable(url):
337 self.report_warning(u'The program functionality for this site has been marked as broken, '
338 u'and will probably not work.')
341 ie_result = ie.extract(url)
342 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
344 if isinstance(ie_result, list):
345 # Backwards compatibility: old IE result format
346 for result in ie_result:
347 result.update(extra_info)
349 '_type': 'compat_list',
350 'entries': ie_result,
353 ie_result.update(extra_info)
354 if 'extractor' not in ie_result:
355 ie_result['extractor'] = ie.IE_NAME
356 return self.process_ie_result(ie_result, download=download)
357 except ExtractorError as de: # An error we somewhat expected
358 self.report_error(compat_str(de), de.format_traceback())
360 except Exception as e:
361 if self.params.get('ignoreerrors', False):
362 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
367 self.report_error(u'no suitable InfoExtractor: %s' % url)
369 def process_ie_result(self, ie_result, download=True, extra_info={}):
371 Take the result of the ie(may be modified) and resolve all unresolved
372 references (URLs, playlist items).
374 It will also download the videos if 'download'.
375 Returns the resolved ie_result.
378 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
379 if result_type == 'video':
380 ie_result.update(extra_info)
381 if 'playlist' not in ie_result:
382 # It isn't part of a playlist
383 ie_result['playlist'] = None
384 ie_result['playlist_index'] = None
386 self.process_info(ie_result)
388 elif result_type == 'url':
389 # We have to add extra_info to the results because it may be
390 # contained in a playlist
391 return self.extract_info(ie_result['url'],
393 ie_key=ie_result.get('ie_key'),
394 extra_info=extra_info)
395 elif result_type == 'playlist':
396 # We process each entry in the playlist
397 playlist = ie_result.get('title', None) or ie_result.get('id', None)
398 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
400 playlist_results = []
402 n_all_entries = len(ie_result['entries'])
403 playliststart = self.params.get('playliststart', 1) - 1
404 playlistend = self.params.get('playlistend', -1)
406 if playlistend == -1:
407 entries = ie_result['entries'][playliststart:]
409 entries = ie_result['entries'][playliststart:playlistend]
411 n_entries = len(entries)
413 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
414 (ie_result['extractor'], playlist, n_all_entries, n_entries))
416 for i,entry in enumerate(entries,1):
417 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
419 'playlist': playlist,
420 'playlist_index': i + playliststart,
422 if not 'extractor' in entry:
423 # We set the extractor, if it's an url it will be set then to
424 # the new extractor, but if it's already a video we must make
425 # sure it's present: see issue #877
426 entry['extractor'] = ie_result['extractor']
427 entry_result = self.process_ie_result(entry,
430 playlist_results.append(entry_result)
431 ie_result['entries'] = playlist_results
433 elif result_type == 'compat_list':
435 r.setdefault('extractor', ie_result['extractor'])
437 ie_result['entries'] = [
438 self.process_ie_result(_fixup(r), download=download)
439 for r in ie_result['entries']
443 raise Exception('Invalid result type: %s' % result_type)
445 def process_info(self, info_dict):
446 """Process a single resolved IE result."""
448 assert info_dict.get('_type', 'video') == 'video'
449 #We increment the download the download count here to match the previous behaviour.
450 self.increment_downloads()
452 info_dict['fulltitle'] = info_dict['title']
453 if len(info_dict['title']) > 200:
454 info_dict['title'] = info_dict['title'][:197] + u'...'
456 # Keep for backwards compatibility
457 info_dict['stitle'] = info_dict['title']
459 if not 'format' in info_dict:
460 info_dict['format'] = info_dict['ext']
462 reason = self._match_entry(info_dict)
463 if reason is not None:
464 self.to_screen(u'[download] ' + reason)
467 max_downloads = self.params.get('max_downloads')
468 if max_downloads is not None:
469 if self._num_downloads > int(max_downloads):
470 raise MaxDownloadsReached()
472 filename = self.prepare_filename(info_dict)
475 if self.params.get('forcetitle', False):
476 compat_print(info_dict['title'])
477 if self.params.get('forceid', False):
478 compat_print(info_dict['id'])
479 if self.params.get('forceurl', False):
480 # For RTMP URLs, also include the playpath
481 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
482 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
483 compat_print(info_dict['thumbnail'])
484 if self.params.get('forcedescription', False) and 'description' in info_dict:
485 compat_print(info_dict['description'])
486 if self.params.get('forcefilename', False) and filename is not None:
487 compat_print(filename)
488 if self.params.get('forceformat', False):
489 compat_print(info_dict['format'])
491 # Do nothing else if in simulate mode
492 if self.params.get('simulate', False):
499 dn = os.path.dirname(encodeFilename(filename))
500 if dn != '' and not os.path.exists(dn):
502 except (OSError, IOError) as err:
503 self.report_error(u'unable to create directory ' + compat_str(err))
506 if self.params.get('writedescription', False):
508 descfn = filename + u'.description'
509 self.report_writedescription(descfn)
510 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
511 descfile.write(info_dict['description'])
512 except (KeyError, TypeError):
513 self.report_warning(u'There\'s no description to write.')
514 except (OSError, IOError):
515 self.report_error(u'Cannot write description file ' + descfn)
518 subtitles_are_requested = any([self.params.get('writesubtitles', False),
519 self.params.get('writeautomaticsub')])
521 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
522 # subtitles download errors are already managed as troubles in relevant IE
523 # that way it will silently go on when used with unsupporting IE
524 subtitles = info_dict['subtitles']
525 sub_format = self.params.get('subtitlesformat')
526 for sub_lang in subtitles.keys():
527 sub = subtitles[sub_lang]
531 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
532 self.report_writesubtitles(sub_filename)
533 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
535 except (OSError, IOError):
536 self.report_error(u'Cannot write subtitles file ' + descfn)
539 if self.params.get('writeinfojson', False):
540 infofn = filename + u'.info.json'
541 self.report_writeinfojson(infofn)
543 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
544 write_json_file(json_info_dict, encodeFilename(infofn))
545 except (OSError, IOError):
546 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
549 if self.params.get('writethumbnail', False):
550 if info_dict.get('thumbnail') is not None:
551 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
552 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
553 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
554 (info_dict['extractor'], info_dict['id']))
556 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
557 with open(thumb_filename, 'wb') as thumbf:
558 shutil.copyfileobj(uf, thumbf)
559 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
560 (info_dict['extractor'], info_dict['id'], thumb_filename))
561 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
562 self.report_warning(u'Unable to download thumbnail "%s": %s' %
563 (info_dict['thumbnail'], compat_str(err)))
565 if not self.params.get('skip_download', False):
566 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
570 success = self.fd._do_download(filename, info_dict)
571 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
572 self.report_error(u'unable to download video data: %s' % str(err))
574 except (OSError, IOError) as err:
575 raise UnavailableVideoError(err)
576 except (ContentTooShortError, ) as err:
577 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
582 self.post_process(filename, info_dict)
583 except (PostProcessingError) as err:
584 self.report_error(u'postprocessing: %s' % str(err))
587 def download(self, url_list):
588 """Download a given list of URLs."""
589 if len(url_list) > 1 and self.fixed_template():
590 raise SameFileError(self.params['outtmpl'])
594 #It also downloads the videos
595 videos = self.extract_info(url)
596 except UnavailableVideoError:
597 self.report_error(u'unable to download video')
598 except MaxDownloadsReached:
599 self.to_screen(u'[info] Maximum number of downloaded files reached.')
602 return self._download_retcode
604 def post_process(self, filename, ie_info):
605 """Run all the postprocessors on the given file."""
607 info['filepath'] = filename
611 keep_video_wish,new_info = pp.run(info)
612 if keep_video_wish is not None:
614 keep_video = keep_video_wish
615 elif keep_video is None:
616 # No clear decision yet, let IE decide
617 keep_video = keep_video_wish
618 except PostProcessingError as e:
619 self.report_error(e.msg)
620 if keep_video is False and not self.params.get('keepvideo', False):
622 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
623 os.remove(encodeFilename(filename))
624 except (IOError, OSError):
625 self.report_warning(u'Unable to remove downloaded video file')