2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # License: Public domain code
23 # parse_qs was moved from the cgi module to the urlparse module recently.
25 from urlparse import parse_qs
27 from cgi import parse_qs
30 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
31 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
32 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
33 'Accept-Language': 'en-us,en;q=0.5',
36 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
38 def preferredencoding():
39 """Get preferred encoding.
41 Returns the best encoding scheme for the system, based on
42 locale.getpreferredencoding() and some further tweaks.
44 def yield_preferredencoding():
46 pref = locale.getpreferredencoding()
52 return yield_preferredencoding().next()
54 def htmlentity_transform(matchobj):
55 """Transforms an HTML entity to a Unicode character.
57 This function receives a match object and is intended to be used with
58 the re.sub() function.
60 entity = matchobj.group(1)
62 # Known non-numeric HTML entity
63 if entity in htmlentitydefs.name2codepoint:
64 return unichr(htmlentitydefs.name2codepoint[entity])
67 mobj = re.match(ur'(?u)#(x?\d+)', entity)
69 numstr = mobj.group(1)
70 if numstr.startswith(u'x'):
72 numstr = u'0%s' % numstr
75 return unichr(long(numstr, base))
77 # Unknown entity in name, return its literal representation
78 return (u'&%s;' % entity)
80 def sanitize_title(utitle):
81 """Sanitizes a video title so it could be used as part of a filename."""
82 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
83 return utitle.replace(unicode(os.sep), u'%')
85 def sanitize_open(filename, open_mode):
86 """Try to open the given filename, and slightly tweak it if this fails.
88 Attempts to open the given filename. If this fails, it tries to change
89 the filename slightly, step by step, until it's either able to open it
90 or it fails and raises a final exception, like the standard open()
93 It returns the tuple (stream, definitive_file_name).
96 stream = open(filename, open_mode)
97 return (stream, filename)
98 except (IOError, OSError), err:
99 # In case of error, try to remove win32 forbidden chars
100 filename = re.sub(ur'[<>:"\|\?\*]', u'#', filename)
102 # An exception here should be caught in the caller
103 stream = open(filename, open_mode)
104 return (stream, filename)
107 class DownloadError(Exception):
108 """Download Error exception.
110 This exception may be thrown by FileDownloader objects if they are not
111 configured to continue on errors. They will contain the appropriate
116 class SameFileError(Exception):
117 """Same File exception.
119 This exception will be thrown by FileDownloader objects if they detect
120 multiple files would have to be downloaded to the same file on disk.
124 class PostProcessingError(Exception):
125 """Post Processing exception.
127 This exception may be raised by PostProcessor's .run() method to
128 indicate an error in the postprocessing task.
132 class UnavailableFormatError(Exception):
133 """Unavailable Format exception.
135 This exception will be thrown when a video is requested
136 in a format that is not available for that video.
140 class ContentTooShortError(Exception):
141 """Content Too Short exception.
143 This exception may be raised by FileDownloader objects when a file they
144 download is too small for what the server announced first, indicating
145 the connection was probably interrupted.
151 def __init__(self, downloaded, expected):
152 self.downloaded = downloaded
153 self.expected = expected
155 class FileDownloader(object):
156 """File Downloader class.
158 File downloader objects are the ones responsible of downloading the
159 actual video file and writing it to disk if the user has requested
160 it, among some other tasks. In most cases there should be one per
161 program. As, given a video URL, the downloader doesn't know how to
162 extract all the needed information, task that InfoExtractors do, it
163 has to pass the URL to one of them.
165 For this, file downloader objects have a method that allows
166 InfoExtractors to be registered in a given order. When it is passed
167 a URL, the file downloader handles it to the first InfoExtractor it
168 finds that reports being able to handle it. The InfoExtractor extracts
169 all the information about the video or videos the URL refers to, and
170 asks the FileDownloader to process the video information, possibly
171 downloading the video.
173 File downloaders accept a lot of parameters. In order not to saturate
174 the object constructor with arguments, it receives a dictionary of
175 options instead. These options are available through the params
176 attribute for the InfoExtractors to use. The FileDownloader also
177 registers itself as the downloader in charge for the InfoExtractors
178 that are added to it, so this is a "mutual registration".
182 username: Username for authentication purposes.
183 password: Password for authentication purposes.
184 usenetrc: Use netrc for authentication instead.
185 quiet: Do not print messages to stdout.
186 forceurl: Force printing final URL.
187 forcetitle: Force printing title.
188 simulate: Do not download the video files.
189 format: Video format code.
190 outtmpl: Template for output names.
191 ignoreerrors: Do not stop on download errors.
192 ratelimit: Download speed limit, in bytes/sec.
193 nooverwrites: Prevent overwriting files.
194 continuedl: Try to continue downloads if possible.
200 _download_retcode = None
202 def __init__(self, params):
203 """Create a FileDownloader object with the given options."""
206 self._download_retcode = 0
210 def pmkdir(filename):
211 """Create directory components in filename. Similar to Unix "mkdir -p"."""
212 components = filename.split(os.sep)
213 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
214 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
215 for dir in aggregate:
216 if not os.path.exists(dir):
220 def format_bytes(bytes):
223 if type(bytes) is str:
228 exponent = long(math.log(bytes, 1024.0))
229 suffix = 'bkMGTPEZY'[exponent]
230 converted = float(bytes) / float(1024**exponent)
231 return '%.2f%s' % (converted, suffix)
234 def calc_percent(byte_counter, data_len):
237 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
240 def calc_eta(start, now, total, current):
244 if current == 0 or dif < 0.001: # One millisecond
246 rate = float(current) / dif
247 eta = long((float(total) - float(current)) / rate)
248 (eta_mins, eta_secs) = divmod(eta, 60)
251 return '%02d:%02d' % (eta_mins, eta_secs)
254 def calc_speed(start, now, bytes):
256 if bytes == 0 or dif < 0.001: # One millisecond
257 return '%10s' % '---b/s'
258 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
261 def best_block_size(elapsed_time, bytes):
262 new_min = max(bytes / 2.0, 1.0)
263 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
264 if elapsed_time < 0.001:
266 rate = bytes / elapsed_time
274 def parse_bytes(bytestr):
275 """Parse a string indicating a byte quantity into a long integer."""
276 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
279 number = float(matchobj.group(1))
280 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
281 return long(round(number * multiplier))
285 """Verify a URL is valid and data could be downloaded. Return real data URL."""
286 request = urllib2.Request(url, None, std_headers)
287 data = urllib2.urlopen(request)
293 def add_info_extractor(self, ie):
294 """Add an InfoExtractor object to the end of the list."""
296 ie.set_downloader(self)
298 def add_post_processor(self, pp):
299 """Add a PostProcessor object to the end of the chain."""
301 pp.set_downloader(self)
303 def to_stdout(self, message, skip_eol=False):
304 """Print message to stdout if not in quiet mode."""
305 if not self.params.get('quiet', False):
306 print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
309 def to_stderr(self, message):
310 """Print message to stderr."""
311 print >>sys.stderr, message.encode(preferredencoding())
313 def fixed_template(self):
314 """Checks if the output template is fixed."""
315 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
317 def trouble(self, message=None):
318 """Determine action to take when a download problem appears.
320 Depending on if the downloader has been configured to ignore
321 download errors or not, this method may throw an exception or
322 not when errors are found, after printing the message.
324 if message is not None:
325 self.to_stderr(message)
326 if not self.params.get('ignoreerrors', False):
327 raise DownloadError(message)
328 self._download_retcode = 1
330 def slow_down(self, start_time, byte_counter):
331 """Sleep if the download speed is over the rate limit."""
332 rate_limit = self.params.get('ratelimit', None)
333 if rate_limit is None or byte_counter == 0:
336 elapsed = now - start_time
339 speed = float(byte_counter) / elapsed
340 if speed > rate_limit:
341 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
343 def report_destination(self, filename):
344 """Report destination filename."""
345 self.to_stdout(u'[download] Destination: %s' % filename)
347 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
348 """Report download progress."""
349 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
350 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
352 def report_resuming_byte(self, resume_len):
353 """Report attemtp to resume at given byte."""
354 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
356 def report_file_already_downloaded(self, file_name):
357 """Report file has already been fully downloaded."""
358 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
360 def report_unable_to_resume(self):
361 """Report it was impossible to resume download."""
362 self.to_stdout(u'[download] Unable to resume')
364 def report_finish(self):
365 """Report download finished."""
368 def process_info(self, info_dict):
369 """Process a single dictionary returned by an InfoExtractor."""
370 # Do nothing else if in simulate mode
371 if self.params.get('simulate', False):
372 # Verify URL if it's an HTTP one
373 if info_dict['url'].startswith('http'):
375 info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
376 except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
377 raise UnavailableFormatError
380 if self.params.get('forcetitle', False):
381 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
382 if self.params.get('forceurl', False):
383 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
388 template_dict = dict(info_dict)
389 template_dict['epoch'] = unicode(long(time.time()))
390 filename = self.params['outtmpl'] % template_dict
391 except (ValueError, KeyError), err:
392 self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
393 if self.params.get('nooverwrites', False) and os.path.exists(filename):
394 self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
398 self.pmkdir(filename)
399 except (OSError, IOError), err:
400 self.trouble('ERROR: unable to create directories: %s' % str(err))
404 success = self._do_download(filename, info_dict['url'].encode('utf-8'))
405 except (OSError, IOError), err:
406 raise UnavailableFormatError
407 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
408 self.trouble('ERROR: unable to download video data: %s' % str(err))
410 except (ContentTooShortError, ), err:
411 self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
416 self.post_process(filename, info_dict)
417 except (PostProcessingError), err:
418 self.trouble('ERROR: postprocessing: %s' % str(err))
421 def download(self, url_list):
422 """Download a given list of URLs."""
423 if len(url_list) > 1 and self.fixed_template():
424 raise SameFileError(self.params['outtmpl'])
427 suitable_found = False
429 # Go to next InfoExtractor if not suitable
430 if not ie.suitable(url):
433 # Suitable InfoExtractor found
434 suitable_found = True
436 # Extract information from URL and process it
439 # Suitable InfoExtractor had been found; go to next URL
442 if not suitable_found:
443 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
445 return self._download_retcode
447 def post_process(self, filename, ie_info):
448 """Run the postprocessing chain on the given file."""
450 info['filepath'] = filename
456 def _download_with_rtmpdump(self, filename, url):
457 self.report_destination(filename)
459 # Check for rtmpdump first
461 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
462 except (OSError, IOError):
463 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
466 # Download using rtmpdump. rtmpdump returns exit code 2 when
467 # the connection was interrumpted and resuming appears to be
468 # possible. This is part of rtmpdump's normal usage, AFAIK.
469 basic_args = ['rtmpdump', '-q', '-r', url, '-o', filename]
470 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
471 while retval == 2 or retval == 1:
472 self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True)
473 time.sleep(2.0) # This seems to be needed
474 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
476 self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
479 self.trouble('ERROR: rtmpdump exited with code %d' % retval)
482 def _do_download(self, filename, url):
483 # Attempt to download using rtmpdump
484 if url.startswith('rtmp'):
485 return self._download_with_rtmpdump(filename, url)
489 basic_request = urllib2.Request(url, None, std_headers)
490 request = urllib2.Request(url, None, std_headers)
492 # Establish possible resume length
493 if os.path.isfile(filename):
494 resume_len = os.path.getsize(filename)
498 # Request parameters in case of being able to resume
499 if self.params.get('continuedl', False) and resume_len != 0:
500 self.report_resuming_byte(resume_len)
501 request.add_header('Range','bytes=%d-' % resume_len)
504 # Establish connection
506 data = urllib2.urlopen(request)
507 except (urllib2.HTTPError, ), err:
508 if err.code != 416: # 416 is 'Requested range not satisfiable'
511 data = urllib2.urlopen(basic_request)
512 content_length = data.info()['Content-Length']
514 if content_length is not None and long(content_length) == resume_len:
515 # Because the file had already been fully downloaded
516 self.report_file_already_downloaded(filename)
519 # Because the server didn't let us
520 self.report_unable_to_resume()
523 data_len = data.info().get('Content-length', None)
524 data_len_str = self.format_bytes(data_len)
531 data_block = data.read(block_size)
533 data_block_len = len(data_block)
534 if data_block_len == 0:
536 byte_counter += data_block_len
538 # Open file just in time
541 (stream, filename) = sanitize_open(filename, open_mode)
542 self.report_destination(filename)
543 except (OSError, IOError), err:
544 self.trouble('ERROR: unable to open for writing: %s' % str(err))
546 stream.write(data_block)
547 block_size = self.best_block_size(after - before, data_block_len)
550 percent_str = self.calc_percent(byte_counter, data_len)
551 eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
552 speed_str = self.calc_speed(start, time.time(), byte_counter)
553 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
556 self.slow_down(start, byte_counter)
559 if data_len is not None and str(byte_counter) != data_len:
560 raise ContentTooShortError(byte_counter, long(data_len))
563 class InfoExtractor(object):
564 """Information Extractor class.
566 Information extractors are the classes that, given a URL, extract
567 information from the video (or videos) the URL refers to. This
568 information includes the real video URL, the video title and simplified
569 title, author and others. The information is stored in a dictionary
570 which is then passed to the FileDownloader. The FileDownloader
571 processes this information possibly downloading the video to the file
572 system, among other possible outcomes. The dictionaries must include
573 the following fields:
575 id: Video identifier.
576 url: Final video URL.
577 uploader: Nickname of the video uploader.
578 title: Literal title.
579 stitle: Simplified title.
580 ext: Video filename extension.
582 Subclasses of this one should re-define the _real_initialize() and
583 _real_extract() methods, as well as the suitable() static method.
584 Probably, they should also be instantiated and added to the main
591 def __init__(self, downloader=None):
592 """Constructor. Receives an optional downloader."""
594 self.set_downloader(downloader)
598 """Receives a URL and returns True if suitable for this IE."""
601 def initialize(self):
602 """Initializes an instance (authentication, etc)."""
604 self._real_initialize()
607 def extract(self, url):
608 """Extracts URL information and returns it in list of dicts."""
610 return self._real_extract(url)
612 def set_downloader(self, downloader):
613 """Sets the downloader for this IE."""
614 self._downloader = downloader
616 def _real_initialize(self):
617 """Real initialization process. Redefine in subclasses."""
620 def _real_extract(self, url):
621 """Real extraction process. Redefine in subclasses."""
624 class YoutubeIE(InfoExtractor):
625 """Information extractor for youtube.com."""
627 _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
628 _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
629 _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
630 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
631 _NETRC_MACHINE = 'youtube'
632 _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
633 _video_extensions = {
643 return (re.match(YoutubeIE._VALID_URL, url) is not None)
645 def report_lang(self):
646 """Report attempt to set language."""
647 self._downloader.to_stdout(u'[youtube] Setting language')
649 def report_login(self):
650 """Report attempt to log in."""
651 self._downloader.to_stdout(u'[youtube] Logging in')
653 def report_age_confirmation(self):
654 """Report attempt to confirm age."""
655 self._downloader.to_stdout(u'[youtube] Confirming age')
657 def report_video_info_webpage_download(self, video_id):
658 """Report attempt to download video info webpage."""
659 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
661 def report_information_extraction(self, video_id):
662 """Report attempt to extract video information."""
663 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
665 def report_unavailable_format(self, video_id, format):
666 """Report extracted video URL."""
667 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
669 def report_rtmp_download(self):
670 """Indicate the download will use the RTMP protocol."""
671 self._downloader.to_stdout(u'[youtube] RTMP download detected')
673 def _real_initialize(self):
674 if self._downloader is None:
679 downloader_params = self._downloader.params
681 # Attempt to use provided username and password or .netrc data
682 if downloader_params.get('username', None) is not None:
683 username = downloader_params['username']
684 password = downloader_params['password']
685 elif downloader_params.get('usenetrc', False):
687 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
692 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
693 except (IOError, netrc.NetrcParseError), err:
694 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
698 request = urllib2.Request(self._LANG_URL, None, std_headers)
701 urllib2.urlopen(request).read()
702 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
703 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
706 # No authentication to be performed
712 'current_form': 'loginForm',
714 'action_login': 'Log In',
715 'username': username,
716 'password': password,
718 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
721 login_results = urllib2.urlopen(request).read()
722 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
723 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
725 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
726 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
732 'action_confirm': 'Confirm',
734 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
736 self.report_age_confirmation()
737 age_results = urllib2.urlopen(request).read()
738 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
739 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
742 def _real_extract(self, url):
743 # Extract video id from URL
744 mobj = re.match(self._VALID_URL, url)
746 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
748 video_id = mobj.group(2)
750 # Downloader parameters
754 if self._downloader is not None:
755 params = self._downloader.params
756 format_param = params.get('format', None)
757 if format_param == '0':
758 format_param = self._available_formats[quality_index]
763 video_extension = self._video_extensions.get(format_param, 'flv')
766 video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id
767 request = urllib2.Request(video_info_url, None, std_headers)
769 self.report_video_info_webpage_download(video_id)
770 video_info_webpage = urllib2.urlopen(request).read()
771 video_info = parse_qs(video_info_webpage)
772 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
773 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
775 self.report_information_extraction(video_id)
778 if 'token' not in video_info:
779 # Attempt to see if YouTube has issued an error message
780 if 'reason' not in video_info:
781 self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
782 stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
783 stream.write(video_info_webpage)
786 reason = urllib.unquote_plus(video_info['reason'][0])
787 self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
789 token = urllib.unquote_plus(video_info['token'][0])
790 video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
791 if format_param is not None:
792 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
794 # Check possible RTMP download
795 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
796 self.report_rtmp_download()
797 video_real_url = video_info['conn'][0]
800 if 'author' not in video_info:
801 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
803 video_uploader = urllib.unquote_plus(video_info['author'][0])
806 if 'title' not in video_info:
807 self._downloader.trouble(u'ERROR: unable to extract video title')
809 video_title = urllib.unquote_plus(video_info['title'][0])
810 video_title = video_title.decode('utf-8')
811 video_title = sanitize_title(video_title)
814 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
815 simple_title = simple_title.strip(ur'_')
818 # Process video information
819 self._downloader.process_info({
820 'id': video_id.decode('utf-8'),
821 'url': video_real_url.decode('utf-8'),
822 'uploader': video_uploader.decode('utf-8'),
823 'title': video_title,
824 'stitle': simple_title,
825 'ext': video_extension.decode('utf-8'),
830 except UnavailableFormatError, err:
832 if quality_index == len(self._available_formats) - 1:
833 # I don't ever expect this to happen
834 self._downloader.trouble(u'ERROR: no known formats available for video')
837 self.report_unavailable_format(video_id, format_param)
839 format_param = self._available_formats[quality_index]
842 self._downloader.trouble('ERROR: format not available for video')
846 class MetacafeIE(InfoExtractor):
847 """Information Extractor for metacafe.com."""
849 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
850 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
851 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
854 def __init__(self, youtube_ie, downloader=None):
855 InfoExtractor.__init__(self, downloader)
856 self._youtube_ie = youtube_ie
860 return (re.match(MetacafeIE._VALID_URL, url) is not None)
862 def report_disclaimer(self):
863 """Report disclaimer retrieval."""
864 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
866 def report_age_confirmation(self):
867 """Report attempt to confirm age."""
868 self._downloader.to_stdout(u'[metacafe] Confirming age')
870 def report_download_webpage(self, video_id):
871 """Report webpage download."""
872 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
874 def report_extraction(self, video_id):
875 """Report information extraction."""
876 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
878 def _real_initialize(self):
879 # Retrieve disclaimer
880 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
882 self.report_disclaimer()
883 disclaimer = urllib2.urlopen(request).read()
884 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
885 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
891 'submit': "Continue - I'm over 18",
893 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
895 self.report_age_confirmation()
896 disclaimer = urllib2.urlopen(request).read()
897 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
898 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
901 def _real_extract(self, url):
902 # Extract id and simplified title from URL
903 mobj = re.match(self._VALID_URL, url)
905 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
908 video_id = mobj.group(1)
910 # Check if video comes from YouTube
911 mobj2 = re.match(r'^yt-(.*)$', video_id)
912 if mobj2 is not None:
913 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
916 simple_title = mobj.group(2).decode('utf-8')
917 video_extension = 'flv'
919 # Retrieve video webpage to extract further information
920 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
922 self.report_download_webpage(video_id)
923 webpage = urllib2.urlopen(request).read()
924 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
925 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
928 # Extract URL, uploader and title from webpage
929 self.report_extraction(video_id)
930 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
932 self._downloader.trouble(u'ERROR: unable to extract media URL')
934 mediaURL = urllib.unquote(mobj.group(1))
936 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
938 # self._downloader.trouble(u'ERROR: unable to extract gdaKey')
940 #gdaKey = mobj.group(1)
942 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
946 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
948 self._downloader.trouble(u'ERROR: unable to extract title')
950 video_title = mobj.group(1).decode('utf-8')
951 video_title = sanitize_title(video_title)
953 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
955 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
957 video_uploader = mobj.group(1)
960 # Process video information
961 self._downloader.process_info({
962 'id': video_id.decode('utf-8'),
963 'url': video_url.decode('utf-8'),
964 'uploader': video_uploader.decode('utf-8'),
965 'title': video_title,
966 'stitle': simple_title,
967 'ext': video_extension.decode('utf-8'),
969 except UnavailableFormatError:
970 self._downloader.trouble(u'ERROR: format not available for video')
973 class GoogleIE(InfoExtractor):
974 """Information extractor for video.google.com."""
976 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
978 def __init__(self, downloader=None):
979 InfoExtractor.__init__(self, downloader)
983 return (re.match(GoogleIE._VALID_URL, url) is not None)
985 def report_download_webpage(self, video_id):
986 """Report webpage download."""
987 self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
989 def report_extraction(self, video_id):
990 """Report information extraction."""
991 self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
993 def _real_initialize(self):
996 def _real_extract(self, url):
997 # Extract id from URL
998 mobj = re.match(self._VALID_URL, url)
1000 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1003 video_id = mobj.group(1)
1005 video_extension = 'mp4'
1007 # Retrieve video webpage to extract further information
1008 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1010 self.report_download_webpage(video_id)
1011 webpage = urllib2.urlopen(request).read()
1012 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1013 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1016 # Extract URL, uploader, and title from webpage
1017 self.report_extraction(video_id)
1018 mobj = re.search(r"download_url:'([^']+)'", webpage)
1020 video_extension = 'flv'
1021 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1023 self._downloader.trouble(u'ERROR: unable to extract media URL')
1025 mediaURL = urllib.unquote(mobj.group(1))
1026 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1027 mediaURL = mediaURL.replace('\\x26', '\x26')
1029 video_url = mediaURL
1031 mobj = re.search(r'<title>(.*)</title>', webpage)
1033 self._downloader.trouble(u'ERROR: unable to extract title')
1035 video_title = mobj.group(1).decode('utf-8')
1036 video_title = sanitize_title(video_title)
1037 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1039 # Google Video doesn't show uploader nicknames?
1040 video_uploader = 'NA'
1043 # Process video information
1044 self._downloader.process_info({
1045 'id': video_id.decode('utf-8'),
1046 'url': video_url.decode('utf-8'),
1047 'uploader': video_uploader.decode('utf-8'),
1048 'title': video_title,
1049 'stitle': simple_title,
1050 'ext': video_extension.decode('utf-8'),
1052 except UnavailableFormatError:
1053 self._downloader.trouble(u'ERROR: format not available for video')
1056 class PhotobucketIE(InfoExtractor):
1057 """Information extractor for photobucket.com."""
1059 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1061 def __init__(self, downloader=None):
1062 InfoExtractor.__init__(self, downloader)
1066 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1068 def report_download_webpage(self, video_id):
1069 """Report webpage download."""
1070 self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
1072 def report_extraction(self, video_id):
1073 """Report information extraction."""
1074 self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
1076 def _real_initialize(self):
1079 def _real_extract(self, url):
1080 # Extract id from URL
1081 mobj = re.match(self._VALID_URL, url)
1083 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1086 video_id = mobj.group(1)
1088 video_extension = 'flv'
1090 # Retrieve video webpage to extract further information
1091 request = urllib2.Request(url)
1093 self.report_download_webpage(video_id)
1094 webpage = urllib2.urlopen(request).read()
1095 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1096 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1099 # Extract URL, uploader, and title from webpage
1100 self.report_extraction(video_id)
1101 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1103 self._downloader.trouble(u'ERROR: unable to extract media URL')
1105 mediaURL = urllib.unquote(mobj.group(1))
1107 video_url = mediaURL
1109 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1111 self._downloader.trouble(u'ERROR: unable to extract title')
1113 video_title = mobj.group(1).decode('utf-8')
1114 video_title = sanitize_title(video_title)
1115 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1117 video_uploader = mobj.group(2).decode('utf-8')
1120 # Process video information
1121 self._downloader.process_info({
1122 'id': video_id.decode('utf-8'),
1123 'url': video_url.decode('utf-8'),
1124 'uploader': video_uploader,
1125 'title': video_title,
1126 'stitle': simple_title,
1127 'ext': video_extension.decode('utf-8'),
1129 except UnavailableFormatError:
1130 self._downloader.trouble(u'ERROR: format not available for video')
1133 class GenericIE(InfoExtractor):
1134 """Generic last-resort information extractor."""
1136 def __init__(self, downloader=None):
1137 InfoExtractor.__init__(self, downloader)
1143 def report_download_webpage(self, video_id):
1144 """Report webpage download."""
1145 self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.')
1146 self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id)
1148 def report_extraction(self, video_id):
1149 """Report information extraction."""
1150 self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id)
1152 def _real_initialize(self):
1155 def _real_extract(self, url):
1156 video_id = url.split('/')[-1]
1157 request = urllib2.Request(url)
1159 self.report_download_webpage(video_id)
1160 webpage = urllib2.urlopen(request).read()
1161 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1162 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1164 except ValueError, err:
1165 # since this is the last-resort InfoExtractor, if
1166 # this error is thrown, it'll be thrown here
1167 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1170 # Start with something easy: JW Player in SWFObject
1171 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1173 # Broaden the search a little bit
1174 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1176 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1179 # It's possible that one of the regexes
1180 # matched, but returned an empty group:
1181 if mobj.group(1) is None:
1182 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1185 video_url = urllib.unquote(mobj.group(1))
1186 video_id = os.path.basename(video_url)
1188 # here's a fun little line of code for you:
1189 video_extension = os.path.splitext(video_id)[1][1:]
1190 video_id = os.path.splitext(video_id)[0]
1192 # it's tempting to parse this further, but you would
1193 # have to take into account all the variations like
1194 # Video Title - Site Name
1195 # Site Name | Video Title
1196 # Video Title - Tagline | Site Name
1197 # and so on and so forth; it's just not practical
1198 mobj = re.search(r'<title>(.*)</title>', webpage)
1200 self._downloader.trouble(u'ERROR: unable to extract title')
1202 video_title = mobj.group(1).decode('utf-8')
1203 video_title = sanitize_title(video_title)
1204 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1206 # video uploader is domain name
1207 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1209 self._downloader.trouble(u'ERROR: unable to extract title')
1211 video_uploader = mobj.group(1).decode('utf-8')
1214 # Process video information
1215 self._downloader.process_info({
1216 'id': video_id.decode('utf-8'),
1217 'url': video_url.decode('utf-8'),
1218 'uploader': video_uploader,
1219 'title': video_title,
1220 'stitle': simple_title,
1221 'ext': video_extension.decode('utf-8'),
1223 except UnavailableFormatError:
1224 self._downloader.trouble(u'ERROR: format not available for video')
1227 class YoutubeSearchIE(InfoExtractor):
1228 """Information Extractor for YouTube search queries."""
1229 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1230 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1231 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1232 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1234 _max_youtube_results = 1000
1236 def __init__(self, youtube_ie, downloader=None):
1237 InfoExtractor.__init__(self, downloader)
1238 self._youtube_ie = youtube_ie
1242 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1244 def report_download_page(self, query, pagenum):
1245 """Report attempt to download playlist page with given number."""
1246 query = query.decode(preferredencoding())
1247 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1249 def _real_initialize(self):
1250 self._youtube_ie.initialize()
1252 def _real_extract(self, query):
1253 mobj = re.match(self._VALID_QUERY, query)
1255 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1258 prefix, query = query.split(':')
1260 query = query.encode('utf-8')
1262 self._download_n_results(query, 1)
1264 elif prefix == 'all':
1265 self._download_n_results(query, self._max_youtube_results)
1271 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1273 elif n > self._max_youtube_results:
1274 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
1275 n = self._max_youtube_results
1276 self._download_n_results(query, n)
1278 except ValueError: # parsing prefix as integer fails
1279 self._download_n_results(query, 1)
1282 def _download_n_results(self, query, n):
1283 """Downloads a specified number of results for a query"""
1286 already_seen = set()
1290 self.report_download_page(query, pagenum)
1291 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1292 request = urllib2.Request(result_url, None, std_headers)
1294 page = urllib2.urlopen(request).read()
1295 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1296 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1299 # Extract video identifiers
1300 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1301 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1302 if video_id not in already_seen:
1303 video_ids.append(video_id)
1304 already_seen.add(video_id)
1305 if len(video_ids) == n:
1306 # Specified n videos reached
1307 for id in video_ids:
1308 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1311 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1312 for id in video_ids:
1313 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1316 pagenum = pagenum + 1
1318 class YoutubePlaylistIE(InfoExtractor):
1319 """Information Extractor for YouTube playlists."""
1321 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
1322 _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1323 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1324 _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
1327 def __init__(self, youtube_ie, downloader=None):
1328 InfoExtractor.__init__(self, downloader)
1329 self._youtube_ie = youtube_ie
1333 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1335 def report_download_page(self, playlist_id, pagenum):
1336 """Report attempt to download playlist page with given number."""
1337 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1339 def _real_initialize(self):
1340 self._youtube_ie.initialize()
1342 def _real_extract(self, url):
1343 # Extract playlist id
1344 mobj = re.match(self._VALID_URL, url)
1346 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1349 # Download playlist pages
1350 playlist_id = mobj.group(1)
1355 self.report_download_page(playlist_id, pagenum)
1356 request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1358 page = urllib2.urlopen(request).read()
1359 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1360 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1363 # Extract video identifiers
1365 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1366 if mobj.group(1) not in ids_in_page:
1367 ids_in_page.append(mobj.group(1))
1368 video_ids.extend(ids_in_page)
1370 if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
1372 pagenum = pagenum + 1
1374 for id in video_ids:
1375 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1378 class YoutubeUserIE(InfoExtractor):
1379 """Information Extractor for YouTube users."""
1381 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1382 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1383 _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1386 def __init__(self, youtube_ie, downloader=None):
1387 InfoExtractor.__init__(self, downloader)
1388 self._youtube_ie = youtube_ie
1392 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1394 def report_download_page(self, username):
1395 """Report attempt to download user page."""
1396 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1398 def _real_initialize(self):
1399 self._youtube_ie.initialize()
1401 def _real_extract(self, url):
1403 mobj = re.match(self._VALID_URL, url)
1405 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1408 # Download user page
1409 username = mobj.group(1)
1413 self.report_download_page(username)
1414 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1416 page = urllib2.urlopen(request).read()
1417 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1418 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1421 # Extract video identifiers
1424 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1425 if mobj.group(1) not in ids_in_page:
1426 ids_in_page.append(mobj.group(1))
1427 video_ids.extend(ids_in_page)
1429 for id in video_ids:
1430 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1433 class PostProcessor(object):
1434 """Post Processor class.
1436 PostProcessor objects can be added to downloaders with their
1437 add_post_processor() method. When the downloader has finished a
1438 successful download, it will take its internal chain of PostProcessors
1439 and start calling the run() method on each one of them, first with
1440 an initial argument and then with the returned value of the previous
1443 The chain will be stopped if one of them ever returns None or the end
1444 of the chain is reached.
1446 PostProcessor objects follow a "mutual registration" process similar
1447 to InfoExtractor objects.
1452 def __init__(self, downloader=None):
1453 self._downloader = downloader
1455 def set_downloader(self, downloader):
1456 """Sets the downloader for this PP."""
1457 self._downloader = downloader
1459 def run(self, information):
1460 """Run the PostProcessor.
1462 The "information" argument is a dictionary like the ones
1463 composed by InfoExtractors. The only difference is that this
1464 one has an extra field called "filepath" that points to the
1467 When this method returns None, the postprocessing chain is
1468 stopped. However, this method may return an information
1469 dictionary that will be passed to the next postprocessing
1470 object in the chain. It can be the one it received after
1471 changing some fields.
1473 In addition, this method may raise a PostProcessingError
1474 exception that will be taken into account by the downloader
1477 return information # by default, do nothing
1479 ### MAIN PROGRAM ###
1480 if __name__ == '__main__':
1482 # Modules needed only when running the main program
1486 # Function to update the program file with the latest version from bitbucket.org
1487 def update_self(downloader, filename):
1488 # Note: downloader only used for options
1489 if not os.access (filename, os.W_OK):
1490 sys.exit('ERROR: no write permissions on %s' % filename)
1492 downloader.to_stdout('Updating to latest stable version...')
1493 latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1494 latest_version = urllib.urlopen(latest_url).read().strip()
1495 prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1496 newcontent = urllib.urlopen(prog_url).read()
1497 stream = open(filename, 'w')
1498 stream.write(newcontent)
1500 downloader.to_stdout('Updated to version %s' % latest_version)
1502 # General configuration
1503 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
1504 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
1505 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1507 # Parse command line
1508 parser = optparse.OptionParser(
1509 usage='Usage: %prog [options] url...',
1510 version='2010.02.13',
1511 conflict_handler='resolve',
1514 parser.add_option('-h', '--help',
1515 action='help', help='print this help text and exit')
1516 parser.add_option('-v', '--version',
1517 action='version', help='print program version and exit')
1518 parser.add_option('-U', '--update',
1519 action='store_true', dest='update_self', help='update this program to latest stable version')
1520 parser.add_option('-i', '--ignore-errors',
1521 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
1522 parser.add_option('-r', '--rate-limit',
1523 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
1525 authentication = optparse.OptionGroup(parser, 'Authentication Options')
1526 authentication.add_option('-u', '--username',
1527 dest='username', metavar='UN', help='account username')
1528 authentication.add_option('-p', '--password',
1529 dest='password', metavar='PW', help='account password')
1530 authentication.add_option('-n', '--netrc',
1531 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
1532 parser.add_option_group(authentication)
1534 video_format = optparse.OptionGroup(parser, 'Video Format Options')
1535 video_format.add_option('-f', '--format',
1536 action='store', dest='format', metavar='FMT', help='video format code')
1537 video_format.add_option('-b', '--best-quality',
1538 action='store_const', dest='format', help='download the best quality video possible', const='0')
1539 video_format.add_option('-m', '--mobile-version',
1540 action='store_const', dest='format', help='alias for -f 17', const='17')
1541 video_format.add_option('-d', '--high-def',
1542 action='store_const', dest='format', help='alias for -f 22', const='22')
1543 parser.add_option_group(video_format)
1545 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
1546 verbosity.add_option('-q', '--quiet',
1547 action='store_true', dest='quiet', help='activates quiet mode', default=False)
1548 verbosity.add_option('-s', '--simulate',
1549 action='store_true', dest='simulate', help='do not download video', default=False)
1550 verbosity.add_option('-g', '--get-url',
1551 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
1552 verbosity.add_option('-e', '--get-title',
1553 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
1554 parser.add_option_group(verbosity)
1556 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
1557 filesystem.add_option('-t', '--title',
1558 action='store_true', dest='usetitle', help='use title in file name', default=False)
1559 filesystem.add_option('-l', '--literal',
1560 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
1561 filesystem.add_option('-o', '--output',
1562 dest='outtmpl', metavar='TPL', help='output filename template')
1563 filesystem.add_option('-a', '--batch-file',
1564 dest='batchfile', metavar='F', help='file containing URLs to download')
1565 filesystem.add_option('-w', '--no-overwrites',
1566 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
1567 filesystem.add_option('-c', '--continue',
1568 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
1569 parser.add_option_group(filesystem)
1571 (opts, args) = parser.parse_args()
1573 # Batch file verification
1575 if opts.batchfile is not None:
1577 batchurls = open(opts.batchfile, 'r').readlines()
1578 batchurls = [x.strip() for x in batchurls]
1579 batchurls = [x for x in batchurls if len(x) > 0]
1581 sys.exit(u'ERROR: batch file could not be read')
1582 all_urls = batchurls + args
1584 # Conflicting, missing and erroneous options
1585 if opts.usenetrc and (opts.username is not None or opts.password is not None):
1586 parser.error(u'using .netrc conflicts with giving username/password')
1587 if opts.password is not None and opts.username is None:
1588 parser.error(u'account username missing')
1589 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
1590 parser.error(u'using output template conflicts with using title or literal title')
1591 if opts.usetitle and opts.useliteral:
1592 parser.error(u'using title conflicts with using literal title')
1593 if opts.username is not None and opts.password is None:
1594 opts.password = getpass.getpass(u'Type account password and press return:')
1595 if opts.ratelimit is not None:
1596 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
1597 if numeric_limit is None:
1598 parser.error(u'invalid rate limit specified')
1599 opts.ratelimit = numeric_limit
1601 # Information extractors
1602 youtube_ie = YoutubeIE()
1603 metacafe_ie = MetacafeIE(youtube_ie)
1604 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
1605 youtube_user_ie = YoutubeUserIE(youtube_ie)
1606 youtube_search_ie = YoutubeSearchIE(youtube_ie)
1607 google_ie = GoogleIE()
1608 photobucket_ie = PhotobucketIE()
1609 generic_ie = GenericIE()
1612 fd = FileDownloader({
1613 'usenetrc': opts.usenetrc,
1614 'username': opts.username,
1615 'password': opts.password,
1616 'quiet': (opts.quiet or opts.geturl or opts.gettitle),
1617 'forceurl': opts.geturl,
1618 'forcetitle': opts.gettitle,
1619 'simulate': (opts.simulate or opts.geturl or opts.gettitle),
1620 'format': opts.format,
1621 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
1622 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
1623 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
1624 or u'%(id)s.%(ext)s'),
1625 'ignoreerrors': opts.ignoreerrors,
1626 'ratelimit': opts.ratelimit,
1627 'nooverwrites': opts.nooverwrites,
1628 'continuedl': opts.continue_dl,
1630 fd.add_info_extractor(youtube_search_ie)
1631 fd.add_info_extractor(youtube_pl_ie)
1632 fd.add_info_extractor(youtube_user_ie)
1633 fd.add_info_extractor(metacafe_ie)
1634 fd.add_info_extractor(youtube_ie)
1635 fd.add_info_extractor(google_ie)
1636 fd.add_info_extractor(photobucket_ie)
1638 # This must come last since it's the
1639 # fallback if none of the others work
1640 fd.add_info_extractor(generic_ie)
1643 if opts.update_self:
1644 update_self(fd, sys.argv[0])
1647 if len(all_urls) < 1:
1648 if not opts.update_self:
1649 parser.error(u'you must provide at least one URL')
1652 retcode = fd.download(all_urls)
1655 except DownloadError:
1657 except SameFileError:
1658 sys.exit(u'ERROR: fixed output name but more than one file to download')
1659 except KeyboardInterrupt:
1660 sys.exit(u'\nERROR: Interrupted by user')