2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # License: Public domain code
23 # parse_qs was moved from the cgi module to the urlparse module recently.
25 from urlparse import parse_qs
27 from cgi import parse_qs
30 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
31 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
32 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
33 'Accept-Language': 'en-us,en;q=0.5',
36 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
38 def preferredencoding():
39 """Get preferred encoding.
41 Returns the best encoding scheme for the system, based on
42 locale.getpreferredencoding() and some further tweaks.
44 def yield_preferredencoding():
46 pref = locale.getpreferredencoding()
52 return yield_preferredencoding().next()
54 def htmlentity_transform(matchobj):
55 """Transforms an HTML entity to a Unicode character.
57 This function receives a match object and is intended to be used with
58 the re.sub() function.
60 entity = matchobj.group(1)
62 # Known non-numeric HTML entity
63 if entity in htmlentitydefs.name2codepoint:
64 return unichr(htmlentitydefs.name2codepoint[entity])
67 mobj = re.match(ur'(?u)#(x?\d+)', entity)
69 numstr = mobj.group(1)
70 if numstr.startswith(u'x'):
72 numstr = u'0%s' % numstr
75 return unichr(long(numstr, base))
77 # Unknown entity in name, return its literal representation
78 return (u'&%s;' % entity)
80 def sanitize_title(utitle):
81 """Sanitizes a video title so it could be used as part of a filename.
83 This triggers different transformations based on the platform we
86 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
87 if sys.platform == 'win32':
88 return re.replace(ur'<>:"\|\?\*', u'-', title)
89 return utitle.replace(unicode(os.sep), u'%')
91 class DownloadError(Exception):
92 """Download Error exception.
94 This exception may be thrown by FileDownloader objects if they are not
95 configured to continue on errors. They will contain the appropriate
100 class SameFileError(Exception):
101 """Same File exception.
103 This exception will be thrown by FileDownloader objects if they detect
104 multiple files would have to be downloaded to the same file on disk.
108 class PostProcessingError(Exception):
109 """Post Processing exception.
111 This exception may be raised by PostProcessor's .run() method to
112 indicate an error in the postprocessing task.
116 class UnavailableFormatError(Exception):
117 """Unavailable Format exception.
119 This exception will be thrown when a video is requested
120 in a format that is not available for that video.
124 class ContentTooShortError(Exception):
125 """Content Too Short exception.
127 This exception may be raised by FileDownloader objects when a file they
128 download is too small for what the server announced first, indicating
129 the connection was probably interrupted.
135 def __init__(self, downloaded, expected):
136 self.downloaded = downloaded
137 self.expected = expected
139 class FileDownloader(object):
140 """File Downloader class.
142 File downloader objects are the ones responsible of downloading the
143 actual video file and writing it to disk if the user has requested
144 it, among some other tasks. In most cases there should be one per
145 program. As, given a video URL, the downloader doesn't know how to
146 extract all the needed information, task that InfoExtractors do, it
147 has to pass the URL to one of them.
149 For this, file downloader objects have a method that allows
150 InfoExtractors to be registered in a given order. When it is passed
151 a URL, the file downloader handles it to the first InfoExtractor it
152 finds that reports being able to handle it. The InfoExtractor extracts
153 all the information about the video or videos the URL refers to, and
154 asks the FileDownloader to process the video information, possibly
155 downloading the video.
157 File downloaders accept a lot of parameters. In order not to saturate
158 the object constructor with arguments, it receives a dictionary of
159 options instead. These options are available through the params
160 attribute for the InfoExtractors to use. The FileDownloader also
161 registers itself as the downloader in charge for the InfoExtractors
162 that are added to it, so this is a "mutual registration".
166 username: Username for authentication purposes.
167 password: Password for authentication purposes.
168 usenetrc: Use netrc for authentication instead.
169 quiet: Do not print messages to stdout.
170 forceurl: Force printing final URL.
171 forcetitle: Force printing title.
172 simulate: Do not download the video files.
173 format: Video format code.
174 outtmpl: Template for output names.
175 ignoreerrors: Do not stop on download errors.
176 ratelimit: Download speed limit, in bytes/sec.
177 nooverwrites: Prevent overwriting files.
178 continuedl: Try to continue downloads if possible.
184 _download_retcode = None
186 def __init__(self, params):
187 """Create a FileDownloader object with the given options."""
190 self._download_retcode = 0
194 def pmkdir(filename):
195 """Create directory components in filename. Similar to Unix "mkdir -p"."""
196 components = filename.split(os.sep)
197 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
198 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
199 for dir in aggregate:
200 if not os.path.exists(dir):
204 def format_bytes(bytes):
207 if type(bytes) is str:
212 exponent = long(math.log(bytes, 1024.0))
213 suffix = 'bkMGTPEZY'[exponent]
214 converted = float(bytes) / float(1024**exponent)
215 return '%.2f%s' % (converted, suffix)
218 def calc_percent(byte_counter, data_len):
221 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
224 def calc_eta(start, now, total, current):
228 if current == 0 or dif < 0.001: # One millisecond
230 rate = float(current) / dif
231 eta = long((float(total) - float(current)) / rate)
232 (eta_mins, eta_secs) = divmod(eta, 60)
235 return '%02d:%02d' % (eta_mins, eta_secs)
238 def calc_speed(start, now, bytes):
240 if bytes == 0 or dif < 0.001: # One millisecond
241 return '%10s' % '---b/s'
242 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
245 def best_block_size(elapsed_time, bytes):
246 new_min = max(bytes / 2.0, 1.0)
247 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
248 if elapsed_time < 0.001:
250 rate = bytes / elapsed_time
258 def parse_bytes(bytestr):
259 """Parse a string indicating a byte quantity into a long integer."""
260 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
263 number = float(matchobj.group(1))
264 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
265 return long(round(number * multiplier))
269 """Verify a URL is valid and data could be downloaded. Return real data URL."""
270 request = urllib2.Request(url, None, std_headers)
271 data = urllib2.urlopen(request)
277 def add_info_extractor(self, ie):
278 """Add an InfoExtractor object to the end of the list."""
280 ie.set_downloader(self)
282 def add_post_processor(self, pp):
283 """Add a PostProcessor object to the end of the chain."""
285 pp.set_downloader(self)
287 def to_stdout(self, message, skip_eol=False):
288 """Print message to stdout if not in quiet mode."""
289 if not self.params.get('quiet', False):
290 print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
293 def to_stderr(self, message):
294 """Print message to stderr."""
295 print >>sys.stderr, message.encode(preferredencoding())
297 def fixed_template(self):
298 """Checks if the output template is fixed."""
299 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
301 def trouble(self, message=None):
302 """Determine action to take when a download problem appears.
304 Depending on if the downloader has been configured to ignore
305 download errors or not, this method may throw an exception or
306 not when errors are found, after printing the message.
308 if message is not None:
309 self.to_stderr(message)
310 if not self.params.get('ignoreerrors', False):
311 raise DownloadError(message)
312 self._download_retcode = 1
314 def slow_down(self, start_time, byte_counter):
315 """Sleep if the download speed is over the rate limit."""
316 rate_limit = self.params.get('ratelimit', None)
317 if rate_limit is None or byte_counter == 0:
320 elapsed = now - start_time
323 speed = float(byte_counter) / elapsed
324 if speed > rate_limit:
325 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
327 def report_destination(self, filename):
328 """Report destination filename."""
329 self.to_stdout(u'[download] Destination: %s' % filename)
331 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
332 """Report download progress."""
333 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
334 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
336 def report_resuming_byte(self, resume_len):
337 """Report attemtp to resume at given byte."""
338 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
340 def report_file_already_downloaded(self, file_name):
341 """Report file has already been fully downloaded."""
342 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
344 def report_unable_to_resume(self):
345 """Report it was impossible to resume download."""
346 self.to_stdout(u'[download] Unable to resume')
348 def report_finish(self):
349 """Report download finished."""
352 def process_info(self, info_dict):
353 """Process a single dictionary returned by an InfoExtractor."""
354 # Do nothing else if in simulate mode
355 if self.params.get('simulate', False):
356 # Verify URL if it's an HTTP one
357 if info_dict['url'].startswith('http'):
359 info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
360 except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
361 raise UnavailableFormatError
364 if self.params.get('forcetitle', False):
365 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
366 if self.params.get('forceurl', False):
367 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
372 template_dict = dict(info_dict)
373 template_dict['epoch'] = unicode(long(time.time()))
374 filename = self.params['outtmpl'] % template_dict
375 except (ValueError, KeyError), err:
376 self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
377 if self.params.get('nooverwrites', False) and os.path.exists(filename):
378 self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
382 self.pmkdir(filename)
383 except (OSError, IOError), err:
384 self.trouble('ERROR: unable to create directories: %s' % str(err))
388 success = self._do_download(filename, info_dict['url'].encode('utf-8'))
389 except (OSError, IOError), err:
390 raise UnavailableFormatError
391 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
392 self.trouble('ERROR: unable to download video data: %s' % str(err))
394 except (ContentTooShortError, ), err:
395 self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
400 self.post_process(filename, info_dict)
401 except (PostProcessingError), err:
402 self.trouble('ERROR: postprocessing: %s' % str(err))
405 def download(self, url_list):
406 """Download a given list of URLs."""
407 if len(url_list) > 1 and self.fixed_template():
408 raise SameFileError(self.params['outtmpl'])
411 suitable_found = False
413 # Go to next InfoExtractor if not suitable
414 if not ie.suitable(url):
417 # Suitable InfoExtractor found
418 suitable_found = True
420 # Extract information from URL and process it
423 # Suitable InfoExtractor had been found; go to next URL
426 if not suitable_found:
427 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
429 return self._download_retcode
431 def post_process(self, filename, ie_info):
432 """Run the postprocessing chain on the given file."""
434 info['filepath'] = filename
440 def _download_with_rtmpdump(self, filename, url):
441 self.report_destination(filename)
443 # Check for rtmpdump first
445 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
446 except (OSError, IOError):
447 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
450 # Download using rtmpdump. rtmpdump returns exit code 2 when
451 # the connection was interrumpted and resuming appears to be
452 # possible. This is part of rtmpdump's normal usage, AFAIK.
453 basic_args = ['rtmpdump', '-q', '-r', url, '-o', filename]
454 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
455 while retval == 2 or retval == 1:
456 self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True)
457 time.sleep(2.0) # This seems to be needed
458 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
460 self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
463 self.trouble('ERROR: rtmpdump exited with code %d' % retval)
466 def _do_download(self, filename, url):
467 # Attempt to download using rtmpdump
468 if url.startswith('rtmp'):
469 return self._download_with_rtmpdump(filename, url)
473 basic_request = urllib2.Request(url, None, std_headers)
474 request = urllib2.Request(url, None, std_headers)
476 # Establish possible resume length
477 if os.path.isfile(filename):
478 resume_len = os.path.getsize(filename)
482 # Request parameters in case of being able to resume
483 if self.params.get('continuedl', False) and resume_len != 0:
484 self.report_resuming_byte(resume_len)
485 request.add_header('Range','bytes=%d-' % resume_len)
488 # Establish connection
490 data = urllib2.urlopen(request)
491 except (urllib2.HTTPError, ), err:
492 if err.code != 416: # 416 is 'Requested range not satisfiable'
495 data = urllib2.urlopen(basic_request)
496 content_length = data.info()['Content-Length']
498 if content_length is not None and long(content_length) == resume_len:
499 # Because the file had already been fully downloaded
500 self.report_file_already_downloaded(filename)
503 # Because the server didn't let us
504 self.report_unable_to_resume()
507 data_len = data.info().get('Content-length', None)
508 data_len_str = self.format_bytes(data_len)
515 data_block = data.read(block_size)
517 data_block_len = len(data_block)
518 if data_block_len == 0:
520 byte_counter += data_block_len
522 # Open file just in time
525 stream = open(filename, open_mode)
526 self.report_destination(filename)
527 except (OSError, IOError), err:
528 self.trouble('ERROR: unable to open for writing: %s' % str(err))
530 stream.write(data_block)
531 block_size = self.best_block_size(after - before, data_block_len)
534 percent_str = self.calc_percent(byte_counter, data_len)
535 eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
536 speed_str = self.calc_speed(start, time.time(), byte_counter)
537 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
540 self.slow_down(start, byte_counter)
543 if data_len is not None and str(byte_counter) != data_len:
544 raise ContentTooShortError(byte_counter, long(data_len))
547 class InfoExtractor(object):
548 """Information Extractor class.
550 Information extractors are the classes that, given a URL, extract
551 information from the video (or videos) the URL refers to. This
552 information includes the real video URL, the video title and simplified
553 title, author and others. The information is stored in a dictionary
554 which is then passed to the FileDownloader. The FileDownloader
555 processes this information possibly downloading the video to the file
556 system, among other possible outcomes. The dictionaries must include
557 the following fields:
559 id: Video identifier.
560 url: Final video URL.
561 uploader: Nickname of the video uploader.
562 title: Literal title.
563 stitle: Simplified title.
564 ext: Video filename extension.
566 Subclasses of this one should re-define the _real_initialize() and
567 _real_extract() methods, as well as the suitable() static method.
568 Probably, they should also be instantiated and added to the main
575 def __init__(self, downloader=None):
576 """Constructor. Receives an optional downloader."""
578 self.set_downloader(downloader)
582 """Receives a URL and returns True if suitable for this IE."""
585 def initialize(self):
586 """Initializes an instance (authentication, etc)."""
588 self._real_initialize()
591 def extract(self, url):
592 """Extracts URL information and returns it in list of dicts."""
594 return self._real_extract(url)
596 def set_downloader(self, downloader):
597 """Sets the downloader for this IE."""
598 self._downloader = downloader
600 def _real_initialize(self):
601 """Real initialization process. Redefine in subclasses."""
604 def _real_extract(self, url):
605 """Real extraction process. Redefine in subclasses."""
608 class YoutubeIE(InfoExtractor):
609 """Information extractor for youtube.com."""
611 _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
612 _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
613 _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
614 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
615 _NETRC_MACHINE = 'youtube'
616 _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
617 _video_extensions = {
627 return (re.match(YoutubeIE._VALID_URL, url) is not None)
629 def report_lang(self):
630 """Report attempt to set language."""
631 self._downloader.to_stdout(u'[youtube] Setting language')
633 def report_login(self):
634 """Report attempt to log in."""
635 self._downloader.to_stdout(u'[youtube] Logging in')
637 def report_age_confirmation(self):
638 """Report attempt to confirm age."""
639 self._downloader.to_stdout(u'[youtube] Confirming age')
641 def report_video_info_webpage_download(self, video_id):
642 """Report attempt to download video info webpage."""
643 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
645 def report_information_extraction(self, video_id):
646 """Report attempt to extract video information."""
647 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
649 def report_unavailable_format(self, video_id, format):
650 """Report extracted video URL."""
651 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
653 def report_rtmp_download(self):
654 """Indicate the download will use the RTMP protocol."""
655 self._downloader.to_stdout(u'[youtube] RTMP download detected')
657 def _real_initialize(self):
658 if self._downloader is None:
663 downloader_params = self._downloader.params
665 # Attempt to use provided username and password or .netrc data
666 if downloader_params.get('username', None) is not None:
667 username = downloader_params['username']
668 password = downloader_params['password']
669 elif downloader_params.get('usenetrc', False):
671 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
676 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
677 except (IOError, netrc.NetrcParseError), err:
678 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
682 request = urllib2.Request(self._LANG_URL, None, std_headers)
685 urllib2.urlopen(request).read()
686 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
687 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
690 # No authentication to be performed
696 'current_form': 'loginForm',
698 'action_login': 'Log In',
699 'username': username,
700 'password': password,
702 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
705 login_results = urllib2.urlopen(request).read()
706 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
707 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
709 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
710 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
716 'action_confirm': 'Confirm',
718 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
720 self.report_age_confirmation()
721 age_results = urllib2.urlopen(request).read()
722 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
723 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
726 def _real_extract(self, url):
727 # Extract video id from URL
728 mobj = re.match(self._VALID_URL, url)
730 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
732 video_id = mobj.group(2)
734 # Downloader parameters
738 if self._downloader is not None:
739 params = self._downloader.params
740 format_param = params.get('format', None)
741 if format_param == '0':
742 format_param = self._available_formats[quality_index]
747 video_extension = self._video_extensions.get(format_param, 'flv')
750 video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id
751 request = urllib2.Request(video_info_url, None, std_headers)
753 self.report_video_info_webpage_download(video_id)
754 video_info_webpage = urllib2.urlopen(request).read()
755 video_info = parse_qs(video_info_webpage)
756 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
757 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
759 self.report_information_extraction(video_id)
762 if 'token' not in video_info:
763 # Attempt to see if YouTube has issued an error message
764 if 'reason' not in video_info:
765 self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
766 stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
767 stream.write(video_info_webpage)
770 reason = urllib.unquote_plus(video_info['reason'][0])
771 self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
773 token = urllib.unquote_plus(video_info['token'][0])
774 video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
775 if format_param is not None:
776 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
778 # Check possible RTMP download
779 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
780 self.report_rtmp_download()
781 video_real_url = video_info['conn'][0]
784 if 'author' not in video_info:
785 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
787 video_uploader = urllib.unquote_plus(video_info['author'][0])
790 if 'title' not in video_info:
791 self._downloader.trouble(u'ERROR: unable to extract video title')
793 video_title = urllib.unquote_plus(video_info['title'][0])
794 video_title = video_title.decode('utf-8')
795 video_title = sanitize_title(video_title)
798 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
799 simple_title = simple_title.strip(ur'_')
802 # Process video information
803 self._downloader.process_info({
804 'id': video_id.decode('utf-8'),
805 'url': video_real_url.decode('utf-8'),
806 'uploader': video_uploader.decode('utf-8'),
807 'title': video_title,
808 'stitle': simple_title,
809 'ext': video_extension.decode('utf-8'),
814 except UnavailableFormatError, err:
816 if quality_index == len(self._available_formats) - 1:
817 # I don't ever expect this to happen
818 self._downloader.trouble(u'ERROR: no known formats available for video')
821 self.report_unavailable_format(video_id, format_param)
823 format_param = self._available_formats[quality_index]
826 self._downloader.trouble('ERROR: format not available for video')
830 class MetacafeIE(InfoExtractor):
831 """Information Extractor for metacafe.com."""
833 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
834 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
835 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
838 def __init__(self, youtube_ie, downloader=None):
839 InfoExtractor.__init__(self, downloader)
840 self._youtube_ie = youtube_ie
844 return (re.match(MetacafeIE._VALID_URL, url) is not None)
846 def report_disclaimer(self):
847 """Report disclaimer retrieval."""
848 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
850 def report_age_confirmation(self):
851 """Report attempt to confirm age."""
852 self._downloader.to_stdout(u'[metacafe] Confirming age')
854 def report_download_webpage(self, video_id):
855 """Report webpage download."""
856 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
858 def report_extraction(self, video_id):
859 """Report information extraction."""
860 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
862 def _real_initialize(self):
863 # Retrieve disclaimer
864 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
866 self.report_disclaimer()
867 disclaimer = urllib2.urlopen(request).read()
868 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
869 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
875 'submit': "Continue - I'm over 18",
877 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
879 self.report_age_confirmation()
880 disclaimer = urllib2.urlopen(request).read()
881 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
882 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
885 def _real_extract(self, url):
886 # Extract id and simplified title from URL
887 mobj = re.match(self._VALID_URL, url)
889 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
892 video_id = mobj.group(1)
894 # Check if video comes from YouTube
895 mobj2 = re.match(r'^yt-(.*)$', video_id)
896 if mobj2 is not None:
897 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
900 simple_title = mobj.group(2).decode('utf-8')
901 video_extension = 'flv'
903 # Retrieve video webpage to extract further information
904 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
906 self.report_download_webpage(video_id)
907 webpage = urllib2.urlopen(request).read()
908 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
909 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
912 # Extract URL, uploader and title from webpage
913 self.report_extraction(video_id)
914 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
916 self._downloader.trouble(u'ERROR: unable to extract media URL')
918 mediaURL = urllib.unquote(mobj.group(1))
920 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
922 # self._downloader.trouble(u'ERROR: unable to extract gdaKey')
924 #gdaKey = mobj.group(1)
926 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
930 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
932 self._downloader.trouble(u'ERROR: unable to extract title')
934 video_title = mobj.group(1).decode('utf-8')
935 video_title = sanitize_title(video_title)
937 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
939 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
941 video_uploader = mobj.group(1)
944 # Process video information
945 self._downloader.process_info({
946 'id': video_id.decode('utf-8'),
947 'url': video_url.decode('utf-8'),
948 'uploader': video_uploader.decode('utf-8'),
949 'title': video_title,
950 'stitle': simple_title,
951 'ext': video_extension.decode('utf-8'),
953 except UnavailableFormatError:
954 self._downloader.trouble(u'ERROR: format not available for video')
957 class GoogleIE(InfoExtractor):
958 """Information extractor for video.google.com."""
960 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
962 def __init__(self, downloader=None):
963 InfoExtractor.__init__(self, downloader)
967 return (re.match(GoogleIE._VALID_URL, url) is not None)
969 def report_download_webpage(self, video_id):
970 """Report webpage download."""
971 self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
973 def report_extraction(self, video_id):
974 """Report information extraction."""
975 self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
977 def _real_initialize(self):
980 def _real_extract(self, url):
981 # Extract id from URL
982 mobj = re.match(self._VALID_URL, url)
984 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
987 video_id = mobj.group(1)
989 video_extension = 'mp4'
991 # Retrieve video webpage to extract further information
992 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
994 self.report_download_webpage(video_id)
995 webpage = urllib2.urlopen(request).read()
996 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
997 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1000 # Extract URL, uploader, and title from webpage
1001 self.report_extraction(video_id)
1002 mobj = re.search(r"download_url:'([^']+)'", webpage)
1004 video_extension = 'flv'
1005 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1007 self._downloader.trouble(u'ERROR: unable to extract media URL')
1009 mediaURL = urllib.unquote(mobj.group(1))
1010 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1011 mediaURL = mediaURL.replace('\\x26', '\x26')
1013 video_url = mediaURL
1015 mobj = re.search(r'<title>(.*)</title>', webpage)
1017 self._downloader.trouble(u'ERROR: unable to extract title')
1019 video_title = mobj.group(1).decode('utf-8')
1020 video_title = sanitize_title(video_title)
1022 # Google Video doesn't show uploader nicknames?
1023 video_uploader = 'NA'
1026 # Process video information
1027 self._downloader.process_info({
1028 'id': video_id.decode('utf-8'),
1029 'url': video_url.decode('utf-8'),
1030 'uploader': video_uploader.decode('utf-8'),
1031 'title': video_title,
1032 'stitle': video_title,
1033 'ext': video_extension.decode('utf-8'),
1035 except UnavailableFormatError:
1036 self._downloader.trouble(u'ERROR: format not available for video')
1039 class PhotobucketIE(InfoExtractor):
1040 """Information extractor for photobucket.com."""
1042 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1044 def __init__(self, downloader=None):
1045 InfoExtractor.__init__(self, downloader)
1049 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1051 def report_download_webpage(self, video_id):
1052 """Report webpage download."""
1053 self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
1055 def report_extraction(self, video_id):
1056 """Report information extraction."""
1057 self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
1059 def _real_initialize(self):
1062 def _real_extract(self, url):
1063 # Extract id from URL
1064 mobj = re.match(self._VALID_URL, url)
1066 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1069 video_id = mobj.group(1)
1071 video_extension = 'flv'
1073 # Retrieve video webpage to extract further information
1074 request = urllib2.Request(url)
1076 self.report_download_webpage(video_id)
1077 webpage = urllib2.urlopen(request).read()
1078 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1079 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1082 # Extract URL, uploader, and title from webpage
1083 self.report_extraction(video_id)
1084 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1086 self._downloader.trouble(u'ERROR: unable to extract media URL')
1088 mediaURL = urllib.unquote(mobj.group(1))
1090 video_url = mediaURL
1092 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1094 self._downloader.trouble(u'ERROR: unable to extract title')
1096 video_title = mobj.group(1).decode('utf-8')
1097 video_title = sanitize_title(video_title)
1099 video_uploader = mobj.group(2).decode('utf-8')
1102 # Process video information
1103 self._downloader.process_info({
1104 'id': video_id.decode('utf-8'),
1105 'url': video_url.decode('utf-8'),
1106 'uploader': video_uploader,
1107 'title': video_title,
1108 'stitle': video_title,
1109 'ext': video_extension.decode('utf-8'),
1111 except UnavailableFormatError:
1112 self._downloader.trouble(u'ERROR: format not available for video')
1115 class GenericIE(InfoExtractor):
1116 """Generic last-resort information extractor."""
1118 def __init__(self, downloader=None):
1119 InfoExtractor.__init__(self, downloader)
1125 def report_download_webpage(self, video_id):
1126 """Report webpage download."""
1127 self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.')
1128 self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id)
1130 def report_extraction(self, video_id):
1131 """Report information extraction."""
1132 self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id)
1134 def _real_initialize(self):
1137 def _real_extract(self, url):
1138 video_id = url.split('/')[-1]
1139 request = urllib2.Request(url)
1141 self.report_download_webpage(video_id)
1142 webpage = urllib2.urlopen(request).read()
1143 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1144 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1146 except ValueError, err:
1147 # since this is the last-resort InfoExtractor, if
1148 # this error is thrown, it'll be thrown here
1149 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1152 # Start with something easy: JW Player in SWFObject
1153 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1155 # Broaden the search a little bit
1156 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1158 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1161 # It's possible that one of the regexes
1162 # matched, but returned an empty group:
1163 if mobj.group(1) is None:
1164 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1167 video_url = urllib.unquote(mobj.group(1))
1168 video_id = os.path.basename(video_url)
1170 # here's a fun little line of code for you:
1171 video_extension = os.path.splitext(video_id)[1][1:]
1172 video_id = os.path.splitext(video_id)[0]
1174 # it's tempting to parse this further, but you would
1175 # have to take into account all the variations like
1176 # Video Title - Site Name
1177 # Site Name | Video Title
1178 # Video Title - Tagline | Site Name
1179 # and so on and so forth; it's just not practical
1180 mobj = re.search(r'<title>(.*)</title>', webpage)
1182 self._downloader.trouble(u'ERROR: unable to extract title')
1184 video_title = mobj.group(1).decode('utf-8')
1185 video_title = sanitize_title(video_title)
1187 # video uploader is domain name
1188 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1190 self._downloader.trouble(u'ERROR: unable to extract title')
1192 video_uploader = mobj.group(1).decode('utf-8')
1195 # Process video information
1196 self._downloader.process_info({
1197 'id': video_id.decode('utf-8'),
1198 'url': video_url.decode('utf-8'),
1199 'uploader': video_uploader,
1200 'title': video_title,
1201 'stitle': video_title,
1202 'ext': video_extension.decode('utf-8'),
1204 except UnavailableFormatError:
1205 self._downloader.trouble(u'ERROR: format not available for video')
1208 class YoutubeSearchIE(InfoExtractor):
1209 """Information Extractor for YouTube search queries."""
1210 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1211 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1212 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1213 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1215 _max_youtube_results = 1000
1217 def __init__(self, youtube_ie, downloader=None):
1218 InfoExtractor.__init__(self, downloader)
1219 self._youtube_ie = youtube_ie
1223 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1225 def report_download_page(self, query, pagenum):
1226 """Report attempt to download playlist page with given number."""
1227 query = query.decode(preferredencoding())
1228 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1230 def _real_initialize(self):
1231 self._youtube_ie.initialize()
1233 def _real_extract(self, query):
1234 mobj = re.match(self._VALID_QUERY, query)
1236 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1239 prefix, query = query.split(':')
1241 query = query.encode('utf-8')
1243 self._download_n_results(query, 1)
1245 elif prefix == 'all':
1246 self._download_n_results(query, self._max_youtube_results)
1252 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1254 elif n > self._max_youtube_results:
1255 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
1256 n = self._max_youtube_results
1257 self._download_n_results(query, n)
1259 except ValueError: # parsing prefix as integer fails
1260 self._download_n_results(query, 1)
1263 def _download_n_results(self, query, n):
1264 """Downloads a specified number of results for a query"""
1267 already_seen = set()
1271 self.report_download_page(query, pagenum)
1272 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1273 request = urllib2.Request(result_url, None, std_headers)
1275 page = urllib2.urlopen(request).read()
1276 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1277 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1280 # Extract video identifiers
1281 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1282 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1283 if video_id not in already_seen:
1284 video_ids.append(video_id)
1285 already_seen.add(video_id)
1286 if len(video_ids) == n:
1287 # Specified n videos reached
1288 for id in video_ids:
1289 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1292 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1293 for id in video_ids:
1294 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1297 pagenum = pagenum + 1
1299 class YoutubePlaylistIE(InfoExtractor):
1300 """Information Extractor for YouTube playlists."""
1302 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
1303 _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1304 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1305 _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
1308 def __init__(self, youtube_ie, downloader=None):
1309 InfoExtractor.__init__(self, downloader)
1310 self._youtube_ie = youtube_ie
1314 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1316 def report_download_page(self, playlist_id, pagenum):
1317 """Report attempt to download playlist page with given number."""
1318 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1320 def _real_initialize(self):
1321 self._youtube_ie.initialize()
1323 def _real_extract(self, url):
1324 # Extract playlist id
1325 mobj = re.match(self._VALID_URL, url)
1327 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1330 # Download playlist pages
1331 playlist_id = mobj.group(1)
1336 self.report_download_page(playlist_id, pagenum)
1337 request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1339 page = urllib2.urlopen(request).read()
1340 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1341 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1344 # Extract video identifiers
1346 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1347 if mobj.group(1) not in ids_in_page:
1348 ids_in_page.append(mobj.group(1))
1349 video_ids.extend(ids_in_page)
1351 if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
1353 pagenum = pagenum + 1
1355 for id in video_ids:
1356 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1359 class YoutubeUserIE(InfoExtractor):
1360 """Information Extractor for YouTube users."""
1362 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1363 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1364 _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1367 def __init__(self, youtube_ie, downloader=None):
1368 InfoExtractor.__init__(self, downloader)
1369 self._youtube_ie = youtube_ie
1373 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1375 def report_download_page(self, username):
1376 """Report attempt to download user page."""
1377 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1379 def _real_initialize(self):
1380 self._youtube_ie.initialize()
1382 def _real_extract(self, url):
1384 mobj = re.match(self._VALID_URL, url)
1386 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1389 # Download user page
1390 username = mobj.group(1)
1394 self.report_download_page(username)
1395 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1397 page = urllib2.urlopen(request).read()
1398 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1399 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1402 # Extract video identifiers
1405 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1406 if mobj.group(1) not in ids_in_page:
1407 ids_in_page.append(mobj.group(1))
1408 video_ids.extend(ids_in_page)
1410 for id in video_ids:
1411 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1414 class PostProcessor(object):
1415 """Post Processor class.
1417 PostProcessor objects can be added to downloaders with their
1418 add_post_processor() method. When the downloader has finished a
1419 successful download, it will take its internal chain of PostProcessors
1420 and start calling the run() method on each one of them, first with
1421 an initial argument and then with the returned value of the previous
1424 The chain will be stopped if one of them ever returns None or the end
1425 of the chain is reached.
1427 PostProcessor objects follow a "mutual registration" process similar
1428 to InfoExtractor objects.
1433 def __init__(self, downloader=None):
1434 self._downloader = downloader
1436 def set_downloader(self, downloader):
1437 """Sets the downloader for this PP."""
1438 self._downloader = downloader
1440 def run(self, information):
1441 """Run the PostProcessor.
1443 The "information" argument is a dictionary like the ones
1444 composed by InfoExtractors. The only difference is that this
1445 one has an extra field called "filepath" that points to the
1448 When this method returns None, the postprocessing chain is
1449 stopped. However, this method may return an information
1450 dictionary that will be passed to the next postprocessing
1451 object in the chain. It can be the one it received after
1452 changing some fields.
1454 In addition, this method may raise a PostProcessingError
1455 exception that will be taken into account by the downloader
1458 return information # by default, do nothing
1460 ### MAIN PROGRAM ###
1461 if __name__ == '__main__':
1463 # Modules needed only when running the main program
1467 # Function to update the program file with the latest version from bitbucket.org
1468 def update_self(downloader, filename):
1469 # Note: downloader only used for options
1470 if not os.access (filename, os.W_OK):
1471 sys.exit('ERROR: no write permissions on %s' % filename)
1473 downloader.to_stdout('Updating to latest stable version...')
1474 latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1475 latest_version = urllib.urlopen(latest_url).read().strip()
1476 prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1477 newcontent = urllib.urlopen(prog_url).read()
1478 stream = open(filename, 'w')
1479 stream.write(newcontent)
1481 downloader.to_stdout('Updated to version %s' % latest_version)
1483 # General configuration
1484 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
1485 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
1486 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1488 # Parse command line
1489 parser = optparse.OptionParser(
1490 usage='Usage: %prog [options] url...',
1492 conflict_handler='resolve',
1495 parser.add_option('-h', '--help',
1496 action='help', help='print this help text and exit')
1497 parser.add_option('-v', '--version',
1498 action='version', help='print program version and exit')
1499 parser.add_option('-U', '--update',
1500 action='store_true', dest='update_self', help='update this program to latest stable version')
1501 parser.add_option('-i', '--ignore-errors',
1502 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
1503 parser.add_option('-r', '--rate-limit',
1504 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
1506 authentication = optparse.OptionGroup(parser, 'Authentication Options')
1507 authentication.add_option('-u', '--username',
1508 dest='username', metavar='UN', help='account username')
1509 authentication.add_option('-p', '--password',
1510 dest='password', metavar='PW', help='account password')
1511 authentication.add_option('-n', '--netrc',
1512 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
1513 parser.add_option_group(authentication)
1515 video_format = optparse.OptionGroup(parser, 'Video Format Options')
1516 video_format.add_option('-f', '--format',
1517 action='store', dest='format', metavar='FMT', help='video format code')
1518 video_format.add_option('-b', '--best-quality',
1519 action='store_const', dest='format', help='download the best quality video possible', const='0')
1520 video_format.add_option('-m', '--mobile-version',
1521 action='store_const', dest='format', help='alias for -f 17', const='17')
1522 video_format.add_option('-d', '--high-def',
1523 action='store_const', dest='format', help='alias for -f 22', const='22')
1524 parser.add_option_group(video_format)
1526 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
1527 verbosity.add_option('-q', '--quiet',
1528 action='store_true', dest='quiet', help='activates quiet mode', default=False)
1529 verbosity.add_option('-s', '--simulate',
1530 action='store_true', dest='simulate', help='do not download video', default=False)
1531 verbosity.add_option('-g', '--get-url',
1532 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
1533 verbosity.add_option('-e', '--get-title',
1534 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
1535 parser.add_option_group(verbosity)
1537 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
1538 filesystem.add_option('-t', '--title',
1539 action='store_true', dest='usetitle', help='use title in file name', default=False)
1540 filesystem.add_option('-l', '--literal',
1541 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
1542 filesystem.add_option('-o', '--output',
1543 dest='outtmpl', metavar='TPL', help='output filename template')
1544 filesystem.add_option('-a', '--batch-file',
1545 dest='batchfile', metavar='F', help='file containing URLs to download')
1546 filesystem.add_option('-w', '--no-overwrites',
1547 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
1548 filesystem.add_option('-c', '--continue',
1549 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
1550 parser.add_option_group(filesystem)
1552 (opts, args) = parser.parse_args()
1554 # Batch file verification
1556 if opts.batchfile is not None:
1558 batchurls = open(opts.batchfile, 'r').readlines()
1559 batchurls = [x.strip() for x in batchurls]
1560 batchurls = [x for x in batchurls if len(x) > 0]
1562 sys.exit(u'ERROR: batch file could not be read')
1563 all_urls = batchurls + args
1565 # Make sure all URLs are in our preferred encoding
1566 for i in range(0, len(all_urls)):
1567 all_urls[i] = unicode(all_urls[i], preferredencoding())
1569 # Conflicting, missing and erroneous options
1570 if opts.usenetrc and (opts.username is not None or opts.password is not None):
1571 parser.error(u'using .netrc conflicts with giving username/password')
1572 if opts.password is not None and opts.username is None:
1573 parser.error(u'account username missing')
1574 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
1575 parser.error(u'using output template conflicts with using title or literal title')
1576 if opts.usetitle and opts.useliteral:
1577 parser.error(u'using title conflicts with using literal title')
1578 if opts.username is not None and opts.password is None:
1579 opts.password = getpass.getpass(u'Type account password and press return:')
1580 if opts.ratelimit is not None:
1581 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
1582 if numeric_limit is None:
1583 parser.error(u'invalid rate limit specified')
1584 opts.ratelimit = numeric_limit
1586 # Information extractors
1587 youtube_ie = YoutubeIE()
1588 metacafe_ie = MetacafeIE(youtube_ie)
1589 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
1590 youtube_user_ie = YoutubeUserIE(youtube_ie)
1591 youtube_search_ie = YoutubeSearchIE(youtube_ie)
1592 google_ie = GoogleIE()
1593 photobucket_ie = PhotobucketIE()
1594 generic_ie = GenericIE()
1597 fd = FileDownloader({
1598 'usenetrc': opts.usenetrc,
1599 'username': opts.username,
1600 'password': opts.password,
1601 'quiet': (opts.quiet or opts.geturl or opts.gettitle),
1602 'forceurl': opts.geturl,
1603 'forcetitle': opts.gettitle,
1604 'simulate': (opts.simulate or opts.geturl or opts.gettitle),
1605 'format': opts.format,
1606 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
1607 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
1608 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
1609 or u'%(id)s.%(ext)s'),
1610 'ignoreerrors': opts.ignoreerrors,
1611 'ratelimit': opts.ratelimit,
1612 'nooverwrites': opts.nooverwrites,
1613 'continuedl': opts.continue_dl,
1615 fd.add_info_extractor(youtube_search_ie)
1616 fd.add_info_extractor(youtube_pl_ie)
1617 fd.add_info_extractor(youtube_user_ie)
1618 fd.add_info_extractor(metacafe_ie)
1619 fd.add_info_extractor(youtube_ie)
1620 fd.add_info_extractor(google_ie)
1621 fd.add_info_extractor(photobucket_ie)
1623 # This must come last since it's the
1624 # fallback if none of the others work
1625 fd.add_info_extractor(generic_ie)
1628 if opts.update_self:
1629 update_self(fd, sys.argv[0])
1632 if len(all_urls) < 1:
1633 if not opts.update_self:
1634 parser.error(u'you must provide at least one URL')
1637 retcode = fd.download(all_urls)
1640 except DownloadError:
1642 except SameFileError:
1643 sys.exit(u'ERROR: fixed output name but more than one file to download')
1644 except KeyboardInterrupt:
1645 sys.exit(u'\nERROR: Interrupted by user')