2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # License: Public domain code
23 # parse_qs was moved from the cgi module to the urlparse module recently.
25 from urlparse import parse_qs
27 from cgi import parse_qs
30 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7',
31 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
32 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
33 'Accept-Language': 'en-us,en;q=0.5',
36 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
38 def preferredencoding():
39 """Get preferred encoding.
41 Returns the best encoding scheme for the system, based on
42 locale.getpreferredencoding() and some further tweaks.
44 def yield_preferredencoding():
46 pref = locale.getpreferredencoding()
52 return yield_preferredencoding().next()
54 def htmlentity_transform(matchobj):
55 """Transforms an HTML entity to a Unicode character.
57 This function receives a match object and is intended to be used with
58 the re.sub() function.
60 entity = matchobj.group(1)
62 # Known non-numeric HTML entity
63 if entity in htmlentitydefs.name2codepoint:
64 return unichr(htmlentitydefs.name2codepoint[entity])
67 mobj = re.match(ur'(?u)#(x?\d+)', entity)
69 numstr = mobj.group(1)
70 if numstr.startswith(u'x'):
72 numstr = u'0%s' % numstr
75 return unichr(long(numstr, base))
77 # Unknown entity in name, return its literal representation
78 return (u'&%s;' % entity)
80 def sanitize_title(utitle):
81 """Sanitizes a video title so it could be used as part of a filename."""
82 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
83 return utitle.replace(unicode(os.sep), u'%')
85 def sanitize_open(filename, open_mode):
86 """Try to open the given filename, and slightly tweak it if this fails.
88 Attempts to open the given filename. If this fails, it tries to change
89 the filename slightly, step by step, until it's either able to open it
90 or it fails and raises a final exception, like the standard open()
93 It returns the tuple (stream, definitive_file_name).
97 return (sys.stdout, filename)
98 stream = open(filename, open_mode)
99 return (stream, filename)
100 except (IOError, OSError), err:
101 # In case of error, try to remove win32 forbidden chars
102 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
104 # An exception here should be caught in the caller
105 stream = open(filename, open_mode)
106 return (stream, filename)
109 class DownloadError(Exception):
110 """Download Error exception.
112 This exception may be thrown by FileDownloader objects if they are not
113 configured to continue on errors. They will contain the appropriate
118 class SameFileError(Exception):
119 """Same File exception.
121 This exception will be thrown by FileDownloader objects if they detect
122 multiple files would have to be downloaded to the same file on disk.
126 class PostProcessingError(Exception):
127 """Post Processing exception.
129 This exception may be raised by PostProcessor's .run() method to
130 indicate an error in the postprocessing task.
134 class UnavailableVideoError(Exception):
135 """Unavailable Format exception.
137 This exception will be thrown when a video is requested
138 in a format that is not available for that video.
142 class ContentTooShortError(Exception):
143 """Content Too Short exception.
145 This exception may be raised by FileDownloader objects when a file they
146 download is too small for what the server announced first, indicating
147 the connection was probably interrupted.
153 def __init__(self, downloaded, expected):
154 self.downloaded = downloaded
155 self.expected = expected
157 class FileDownloader(object):
158 """File Downloader class.
160 File downloader objects are the ones responsible of downloading the
161 actual video file and writing it to disk if the user has requested
162 it, among some other tasks. In most cases there should be one per
163 program. As, given a video URL, the downloader doesn't know how to
164 extract all the needed information, task that InfoExtractors do, it
165 has to pass the URL to one of them.
167 For this, file downloader objects have a method that allows
168 InfoExtractors to be registered in a given order. When it is passed
169 a URL, the file downloader handles it to the first InfoExtractor it
170 finds that reports being able to handle it. The InfoExtractor extracts
171 all the information about the video or videos the URL refers to, and
172 asks the FileDownloader to process the video information, possibly
173 downloading the video.
175 File downloaders accept a lot of parameters. In order not to saturate
176 the object constructor with arguments, it receives a dictionary of
177 options instead. These options are available through the params
178 attribute for the InfoExtractors to use. The FileDownloader also
179 registers itself as the downloader in charge for the InfoExtractors
180 that are added to it, so this is a "mutual registration".
184 username: Username for authentication purposes.
185 password: Password for authentication purposes.
186 usenetrc: Use netrc for authentication instead.
187 quiet: Do not print messages to stdout.
188 forceurl: Force printing final URL.
189 forcetitle: Force printing title.
190 simulate: Do not download the video files.
191 format: Video format code.
192 format_limit: Highest quality format to try.
193 outtmpl: Template for output names.
194 ignoreerrors: Do not stop on download errors.
195 ratelimit: Download speed limit, in bytes/sec.
196 nooverwrites: Prevent overwriting files.
197 retries: Number of times to retry for HTTP error 503
198 continuedl: Try to continue downloads if possible.
199 noprogress: Do not print the progress bar.
205 _download_retcode = None
206 _num_downloads = None
208 def __init__(self, params):
209 """Create a FileDownloader object with the given options."""
212 self._download_retcode = 0
213 self._num_downloads = 0
217 def pmkdir(filename):
218 """Create directory components in filename. Similar to Unix "mkdir -p"."""
219 components = filename.split(os.sep)
220 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
221 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
222 for dir in aggregate:
223 if not os.path.exists(dir):
227 def format_bytes(bytes):
230 if type(bytes) is str:
235 exponent = long(math.log(bytes, 1024.0))
236 suffix = 'bkMGTPEZY'[exponent]
237 converted = float(bytes) / float(1024**exponent)
238 return '%.2f%s' % (converted, suffix)
241 def calc_percent(byte_counter, data_len):
244 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
247 def calc_eta(start, now, total, current):
251 if current == 0 or dif < 0.001: # One millisecond
253 rate = float(current) / dif
254 eta = long((float(total) - float(current)) / rate)
255 (eta_mins, eta_secs) = divmod(eta, 60)
258 return '%02d:%02d' % (eta_mins, eta_secs)
261 def calc_speed(start, now, bytes):
263 if bytes == 0 or dif < 0.001: # One millisecond
264 return '%10s' % '---b/s'
265 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
268 def best_block_size(elapsed_time, bytes):
269 new_min = max(bytes / 2.0, 1.0)
270 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
271 if elapsed_time < 0.001:
273 rate = bytes / elapsed_time
281 def parse_bytes(bytestr):
282 """Parse a string indicating a byte quantity into a long integer."""
283 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
286 number = float(matchobj.group(1))
287 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
288 return long(round(number * multiplier))
290 def add_info_extractor(self, ie):
291 """Add an InfoExtractor object to the end of the list."""
293 ie.set_downloader(self)
295 def add_post_processor(self, pp):
296 """Add a PostProcessor object to the end of the chain."""
298 pp.set_downloader(self)
300 def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False):
301 """Print message to stdout if not in quiet mode."""
303 if not self.params.get('quiet', False):
304 print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
306 except (UnicodeEncodeError), err:
307 if not ignore_encoding_errors:
310 def to_stderr(self, message):
311 """Print message to stderr."""
312 print >>sys.stderr, message.encode(preferredencoding())
314 def fixed_template(self):
315 """Checks if the output template is fixed."""
316 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
318 def trouble(self, message=None):
319 """Determine action to take when a download problem appears.
321 Depending on if the downloader has been configured to ignore
322 download errors or not, this method may throw an exception or
323 not when errors are found, after printing the message.
325 if message is not None:
326 self.to_stderr(message)
327 if not self.params.get('ignoreerrors', False):
328 raise DownloadError(message)
329 self._download_retcode = 1
331 def slow_down(self, start_time, byte_counter):
332 """Sleep if the download speed is over the rate limit."""
333 rate_limit = self.params.get('ratelimit', None)
334 if rate_limit is None or byte_counter == 0:
337 elapsed = now - start_time
340 speed = float(byte_counter) / elapsed
341 if speed > rate_limit:
342 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
344 def report_destination(self, filename):
345 """Report destination filename."""
346 self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
348 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
349 """Report download progress."""
350 if self.params.get('noprogress', False):
352 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
353 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
355 def report_resuming_byte(self, resume_len):
356 """Report attemtp to resume at given byte."""
357 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
359 def report_retry(self, count, retries):
360 """Report retry in case of HTTP error 503"""
361 self.to_stdout(u'[download] Got HTTP error 503. Retrying (attempt %d of %d)...' % (count, retries))
363 def report_file_already_downloaded(self, file_name):
364 """Report file has already been fully downloaded."""
366 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
367 except (UnicodeEncodeError), err:
368 self.to_stdout(u'[download] The file has already been downloaded')
370 def report_unable_to_resume(self):
371 """Report it was impossible to resume download."""
372 self.to_stdout(u'[download] Unable to resume')
374 def report_finish(self):
375 """Report download finished."""
376 if self.params.get('noprogress', False):
377 self.to_stdout(u'[download] Download completed')
381 def increment_downloads(self):
382 """Increment the ordinal that assigns a number to each file."""
383 self._num_downloads += 1
385 def process_info(self, info_dict):
386 """Process a single dictionary returned by an InfoExtractor."""
387 # Do nothing else if in simulate mode
388 if self.params.get('simulate', False):
390 if self.params.get('forcetitle', False):
391 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
392 if self.params.get('forceurl', False):
393 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
394 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
395 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
396 if self.params.get('forcedescription', False) and 'description' in info_dict:
397 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
402 template_dict = dict(info_dict)
403 template_dict['epoch'] = unicode(long(time.time()))
404 template_dict['ord'] = unicode('%05d' % self._num_downloads)
405 filename = self.params['outtmpl'] % template_dict
406 except (ValueError, KeyError), err:
407 self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
408 if self.params.get('nooverwrites', False) and os.path.exists(filename):
409 self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
413 self.pmkdir(filename)
414 except (OSError, IOError), err:
415 self.trouble('ERROR: unable to create directories: %s' % str(err))
419 success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
420 except (OSError, IOError), err:
421 raise UnavailableVideoError
422 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
423 self.trouble('ERROR: unable to download video data: %s' % str(err))
425 except (ContentTooShortError, ), err:
426 self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
431 self.post_process(filename, info_dict)
432 except (PostProcessingError), err:
433 self.trouble('ERROR: postprocessing: %s' % str(err))
436 def download(self, url_list):
437 """Download a given list of URLs."""
438 if len(url_list) > 1 and self.fixed_template():
439 raise SameFileError(self.params['outtmpl'])
442 suitable_found = False
444 # Go to next InfoExtractor if not suitable
445 if not ie.suitable(url):
448 # Suitable InfoExtractor found
449 suitable_found = True
451 # Extract information from URL and process it
454 # Suitable InfoExtractor had been found; go to next URL
457 if not suitable_found:
458 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
460 return self._download_retcode
462 def post_process(self, filename, ie_info):
463 """Run the postprocessing chain on the given file."""
465 info['filepath'] = filename
471 def _download_with_rtmpdump(self, filename, url, player_url):
472 self.report_destination(filename)
474 # Check for rtmpdump first
476 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
477 except (OSError, IOError):
478 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
481 # Download using rtmpdump. rtmpdump returns exit code 2 when
482 # the connection was interrumpted and resuming appears to be
483 # possible. This is part of rtmpdump's normal usage, AFAIK.
484 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename]
485 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
486 while retval == 2 or retval == 1:
487 prevsize = os.path.getsize(filename)
488 self.to_stdout(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
489 time.sleep(5.0) # This seems to be needed
490 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
491 cursize = os.path.getsize(filename)
492 if prevsize == cursize and retval == 1:
495 self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
498 self.trouble('\nERROR: rtmpdump exited with code %d' % retval)
501 def _do_download(self, filename, url, player_url):
502 # Attempt to download using rtmpdump
503 if url.startswith('rtmp'):
504 return self._download_with_rtmpdump(filename, url, player_url)
508 basic_request = urllib2.Request(url, None, std_headers)
509 request = urllib2.Request(url, None, std_headers)
511 # Establish possible resume length
512 if os.path.isfile(filename):
513 resume_len = os.path.getsize(filename)
517 # Request parameters in case of being able to resume
518 if self.params.get('continuedl', False) and resume_len != 0:
519 self.report_resuming_byte(resume_len)
520 request.add_header('Range','bytes=%d-' % resume_len)
524 retries = self.params.get('retries', 0)
525 while count <= retries:
526 # Establish connection
528 data = urllib2.urlopen(request)
530 except (urllib2.HTTPError, ), err:
531 if err.code != 503 and err.code != 416:
532 # Unexpected HTTP error
534 elif err.code == 416:
535 # Unable to resume (requested range not satisfiable)
537 # Open the connection again without the range header
538 data = urllib2.urlopen(basic_request)
539 content_length = data.info()['Content-Length']
540 except (urllib2.HTTPError, ), err:
544 # Examine the reported length
545 if content_length is not None and long(content_length) == resume_len:
546 # The file had already been fully downloaded
547 self.report_file_already_downloaded(filename)
550 # The length does not match, we start the download over
551 self.report_unable_to_resume()
557 self.report_retry(count, retries)
560 self.trouble(u'ERROR: giving up after %s retries' % retries)
563 data_len = data.info().get('Content-length', None)
564 data_len_str = self.format_bytes(data_len)
571 data_block = data.read(block_size)
573 data_block_len = len(data_block)
574 if data_block_len == 0:
576 byte_counter += data_block_len
578 # Open file just in time
581 (stream, filename) = sanitize_open(filename, open_mode)
582 self.report_destination(filename)
583 except (OSError, IOError), err:
584 self.trouble('ERROR: unable to open for writing: %s' % str(err))
587 stream.write(data_block)
588 except (IOError, OSError), err:
589 self.trouble('\nERROR: unable to write data: %s' % str(err))
590 block_size = self.best_block_size(after - before, data_block_len)
593 percent_str = self.calc_percent(byte_counter, data_len)
594 eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
595 speed_str = self.calc_speed(start, time.time(), byte_counter)
596 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
599 self.slow_down(start, byte_counter)
602 if data_len is not None and str(byte_counter) != data_len:
603 raise ContentTooShortError(byte_counter, long(data_len))
606 class InfoExtractor(object):
607 """Information Extractor class.
609 Information extractors are the classes that, given a URL, extract
610 information from the video (or videos) the URL refers to. This
611 information includes the real video URL, the video title and simplified
612 title, author and others. The information is stored in a dictionary
613 which is then passed to the FileDownloader. The FileDownloader
614 processes this information possibly downloading the video to the file
615 system, among other possible outcomes. The dictionaries must include
616 the following fields:
618 id: Video identifier.
619 url: Final video URL.
620 uploader: Nickname of the video uploader.
621 title: Literal title.
622 stitle: Simplified title.
623 ext: Video filename extension.
624 format: Video format.
625 player_url: SWF Player URL (may be None).
627 The following fields are optional. Their primary purpose is to allow
628 youtube-dl to serve as the backend for a video search function, such
629 as the one in youtube2mp3. They are only used when their respective
630 forced printing functions are called:
632 thumbnail: Full URL to a video thumbnail image.
633 description: One-line video description.
635 Subclasses of this one should re-define the _real_initialize() and
636 _real_extract() methods, as well as the suitable() static method.
637 Probably, they should also be instantiated and added to the main
644 def __init__(self, downloader=None):
645 """Constructor. Receives an optional downloader."""
647 self.set_downloader(downloader)
651 """Receives a URL and returns True if suitable for this IE."""
654 def initialize(self):
655 """Initializes an instance (authentication, etc)."""
657 self._real_initialize()
660 def extract(self, url):
661 """Extracts URL information and returns it in list of dicts."""
663 return self._real_extract(url)
665 def set_downloader(self, downloader):
666 """Sets the downloader for this IE."""
667 self._downloader = downloader
669 def _real_initialize(self):
670 """Real initialization process. Redefine in subclasses."""
673 def _real_extract(self, url):
674 """Real extraction process. Redefine in subclasses."""
677 class YoutubeIE(InfoExtractor):
678 """Information extractor for youtube.com."""
680 _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
681 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
682 _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
683 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
684 _NETRC_MACHINE = 'youtube'
685 # Listed in order of quality
686 _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
687 _video_extensions = {
693 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
700 return (re.match(YoutubeIE._VALID_URL, url) is not None)
702 def report_lang(self):
703 """Report attempt to set language."""
704 self._downloader.to_stdout(u'[youtube] Setting language')
706 def report_login(self):
707 """Report attempt to log in."""
708 self._downloader.to_stdout(u'[youtube] Logging in')
710 def report_age_confirmation(self):
711 """Report attempt to confirm age."""
712 self._downloader.to_stdout(u'[youtube] Confirming age')
714 def report_video_webpage_download(self, video_id):
715 """Report attempt to download video webpage."""
716 self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
718 def report_video_info_webpage_download(self, video_id):
719 """Report attempt to download video info webpage."""
720 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
722 def report_information_extraction(self, video_id):
723 """Report attempt to extract video information."""
724 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
726 def report_unavailable_format(self, video_id, format):
727 """Report extracted video URL."""
728 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
730 def report_rtmp_download(self):
731 """Indicate the download will use the RTMP protocol."""
732 self._downloader.to_stdout(u'[youtube] RTMP download detected')
734 def _real_initialize(self):
735 if self._downloader is None:
740 downloader_params = self._downloader.params
742 # Attempt to use provided username and password or .netrc data
743 if downloader_params.get('username', None) is not None:
744 username = downloader_params['username']
745 password = downloader_params['password']
746 elif downloader_params.get('usenetrc', False):
748 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
753 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
754 except (IOError, netrc.NetrcParseError), err:
755 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
759 request = urllib2.Request(self._LANG_URL, None, std_headers)
762 urllib2.urlopen(request).read()
763 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
764 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
767 # No authentication to be performed
773 'current_form': 'loginForm',
775 'action_login': 'Log In',
776 'username': username,
777 'password': password,
779 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
782 login_results = urllib2.urlopen(request).read()
783 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
784 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
786 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
787 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
793 'action_confirm': 'Confirm',
795 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
797 self.report_age_confirmation()
798 age_results = urllib2.urlopen(request).read()
799 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
800 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
803 def _real_extract(self, url):
804 # Extract video id from URL
805 mobj = re.match(self._VALID_URL, url)
807 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
809 video_id = mobj.group(2)
812 self.report_video_webpage_download(video_id)
813 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers)
815 video_webpage = urllib2.urlopen(request).read()
816 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
817 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
820 # Attempt to extract SWF player URL
821 mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage)
823 player_url = mobj.group(1)
828 self.report_video_info_webpage_download(video_id)
829 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
830 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
831 % (video_id, el_type))
832 request = urllib2.Request(video_info_url, None, std_headers)
834 video_info_webpage = urllib2.urlopen(request).read()
835 video_info = parse_qs(video_info_webpage)
836 if 'token' in video_info:
838 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
839 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
841 if 'token' not in video_info:
842 if 'reason' in video_info:
843 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0])
845 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
848 # Start extracting information
849 self.report_information_extraction(video_id)
852 if 'author' not in video_info:
853 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
855 video_uploader = urllib.unquote_plus(video_info['author'][0])
858 if 'title' not in video_info:
859 self._downloader.trouble(u'ERROR: unable to extract video title')
861 video_title = urllib.unquote_plus(video_info['title'][0])
862 video_title = video_title.decode('utf-8')
863 video_title = sanitize_title(video_title)
866 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
867 simple_title = simple_title.strip(ur'_')
870 if 'thumbnail_url' not in video_info:
871 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
873 else: # don't panic if we can't find it
874 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
877 video_description = 'No description available.'
878 if self._downloader.params.get('forcedescription', False):
879 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
881 video_description = mobj.group(1)
884 video_token = urllib.unquote_plus(video_info['token'][0])
886 # Decide which formats to download
887 requested_format = self._downloader.params.get('format', None)
888 get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
890 if 'fmt_url_map' in video_info:
891 url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
892 format_limit = self._downloader.params.get('format_limit', None)
893 if format_limit is not None and format_limit in self._available_formats:
894 format_list = self._available_formats[self._available_formats.index(format_limit):]
896 format_list = self._available_formats
897 existing_formats = [x for x in format_list if x in url_map]
898 if len(existing_formats) == 0:
899 self._downloader.trouble(u'ERROR: no known formats available for video')
901 if requested_format is None:
902 video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality
903 elif requested_format == '-1':
904 video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats
906 video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format
908 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
909 self.report_rtmp_download()
910 video_url_list = [(None, video_info['conn'][0])]
913 self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
916 for format_param, video_real_url in video_url_list:
917 # At this point we have a new video
918 self._downloader.increment_downloads()
921 video_extension = self._video_extensions.get(format_param, 'flv')
923 # Find the video URL in fmt_url_map or conn paramters
925 # Process video information
926 self._downloader.process_info({
927 'id': video_id.decode('utf-8'),
928 'url': video_real_url.decode('utf-8'),
929 'uploader': video_uploader.decode('utf-8'),
930 'title': video_title,
931 'stitle': simple_title,
932 'ext': video_extension.decode('utf-8'),
933 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
934 'thumbnail': video_thumbnail.decode('utf-8'),
935 'description': video_description.decode('utf-8'),
936 'player_url': player_url,
938 except UnavailableVideoError, err:
939 self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
942 class MetacafeIE(InfoExtractor):
943 """Information Extractor for metacafe.com."""
945 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
946 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
947 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
950 def __init__(self, youtube_ie, downloader=None):
951 InfoExtractor.__init__(self, downloader)
952 self._youtube_ie = youtube_ie
956 return (re.match(MetacafeIE._VALID_URL, url) is not None)
958 def report_disclaimer(self):
959 """Report disclaimer retrieval."""
960 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
962 def report_age_confirmation(self):
963 """Report attempt to confirm age."""
964 self._downloader.to_stdout(u'[metacafe] Confirming age')
966 def report_download_webpage(self, video_id):
967 """Report webpage download."""
968 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
970 def report_extraction(self, video_id):
971 """Report information extraction."""
972 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
974 def _real_initialize(self):
975 # Retrieve disclaimer
976 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
978 self.report_disclaimer()
979 disclaimer = urllib2.urlopen(request).read()
980 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
981 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
987 'submit': "Continue - I'm over 18",
989 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
991 self.report_age_confirmation()
992 disclaimer = urllib2.urlopen(request).read()
993 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
994 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
997 def _real_extract(self, url):
998 # Extract id and simplified title from URL
999 mobj = re.match(self._VALID_URL, url)
1001 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1004 video_id = mobj.group(1)
1006 # Check if video comes from YouTube
1007 mobj2 = re.match(r'^yt-(.*)$', video_id)
1008 if mobj2 is not None:
1009 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1012 # At this point we have a new video
1013 self._downloader.increment_downloads()
1015 simple_title = mobj.group(2).decode('utf-8')
1016 video_extension = 'flv'
1018 # Retrieve video webpage to extract further information
1019 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1021 self.report_download_webpage(video_id)
1022 webpage = urllib2.urlopen(request).read()
1023 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1024 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1027 # Extract URL, uploader and title from webpage
1028 self.report_extraction(video_id)
1029 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1031 self._downloader.trouble(u'ERROR: unable to extract media URL')
1033 mediaURL = urllib.unquote(mobj.group(1))
1035 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1037 # self._downloader.trouble(u'ERROR: unable to extract gdaKey')
1039 #gdaKey = mobj.group(1)
1041 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1043 video_url = mediaURL
1045 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1047 self._downloader.trouble(u'ERROR: unable to extract title')
1049 video_title = mobj.group(1).decode('utf-8')
1050 video_title = sanitize_title(video_title)
1052 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1054 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1056 video_uploader = mobj.group(1)
1059 # Process video information
1060 self._downloader.process_info({
1061 'id': video_id.decode('utf-8'),
1062 'url': video_url.decode('utf-8'),
1063 'uploader': video_uploader.decode('utf-8'),
1064 'title': video_title,
1065 'stitle': simple_title,
1066 'ext': video_extension.decode('utf-8'),
1070 except UnavailableVideoError:
1071 self._downloader.trouble(u'ERROR: unable to download video')
1074 class DailymotionIE(InfoExtractor):
1075 """Information Extractor for Dailymotion"""
1077 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1079 def __init__(self, downloader=None):
1080 InfoExtractor.__init__(self, downloader)
1084 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1086 def report_download_webpage(self, video_id):
1087 """Report webpage download."""
1088 self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id)
1090 def report_extraction(self, video_id):
1091 """Report information extraction."""
1092 self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id)
1094 def _real_initialize(self):
1097 def _real_extract(self, url):
1098 # Extract id and simplified title from URL
1099 mobj = re.match(self._VALID_URL, url)
1101 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1104 # At this point we have a new video
1105 self._downloader.increment_downloads()
1106 video_id = mobj.group(1)
1108 simple_title = mobj.group(2).decode('utf-8')
1109 video_extension = 'flv'
1111 # Retrieve video webpage to extract further information
1112 request = urllib2.Request(url)
1114 self.report_download_webpage(video_id)
1115 webpage = urllib2.urlopen(request).read()
1116 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1117 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1120 # Extract URL, uploader and title from webpage
1121 self.report_extraction(video_id)
1122 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1124 self._downloader.trouble(u'ERROR: unable to extract media URL')
1126 mediaURL = urllib.unquote(mobj.group(1))
1128 # if needed add http://www.dailymotion.com/ if relative URL
1130 video_url = mediaURL
1132 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1133 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1135 self._downloader.trouble(u'ERROR: unable to extract title')
1137 video_title = mobj.group(1).decode('utf-8')
1138 video_title = sanitize_title(video_title)
1140 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a></div>', webpage)
1142 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1144 video_uploader = mobj.group(1)
1147 # Process video information
1148 self._downloader.process_info({
1149 'id': video_id.decode('utf-8'),
1150 'url': video_url.decode('utf-8'),
1151 'uploader': video_uploader.decode('utf-8'),
1152 'title': video_title,
1153 'stitle': simple_title,
1154 'ext': video_extension.decode('utf-8'),
1158 except UnavailableVideoError:
1159 self._downloader.trouble(u'ERROR: unable to download video')
1161 class GoogleIE(InfoExtractor):
1162 """Information extractor for video.google.com."""
1164 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1166 def __init__(self, downloader=None):
1167 InfoExtractor.__init__(self, downloader)
1171 return (re.match(GoogleIE._VALID_URL, url) is not None)
1173 def report_download_webpage(self, video_id):
1174 """Report webpage download."""
1175 self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
1177 def report_extraction(self, video_id):
1178 """Report information extraction."""
1179 self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
1181 def _real_initialize(self):
1184 def _real_extract(self, url):
1185 # Extract id from URL
1186 mobj = re.match(self._VALID_URL, url)
1188 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1191 # At this point we have a new video
1192 self._downloader.increment_downloads()
1193 video_id = mobj.group(1)
1195 video_extension = 'mp4'
1197 # Retrieve video webpage to extract further information
1198 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1200 self.report_download_webpage(video_id)
1201 webpage = urllib2.urlopen(request).read()
1202 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1203 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1206 # Extract URL, uploader, and title from webpage
1207 self.report_extraction(video_id)
1208 mobj = re.search(r"download_url:'([^']+)'", webpage)
1210 video_extension = 'flv'
1211 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1213 self._downloader.trouble(u'ERROR: unable to extract media URL')
1215 mediaURL = urllib.unquote(mobj.group(1))
1216 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1217 mediaURL = mediaURL.replace('\\x26', '\x26')
1219 video_url = mediaURL
1221 mobj = re.search(r'<title>(.*)</title>', webpage)
1223 self._downloader.trouble(u'ERROR: unable to extract title')
1225 video_title = mobj.group(1).decode('utf-8')
1226 video_title = sanitize_title(video_title)
1227 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1229 # Extract video description
1230 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1232 self._downloader.trouble(u'ERROR: unable to extract video description')
1234 video_description = mobj.group(1).decode('utf-8')
1235 if not video_description:
1236 video_description = 'No description available.'
1238 # Extract video thumbnail
1239 if self._downloader.params.get('forcethumbnail', False):
1240 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1242 webpage = urllib2.urlopen(request).read()
1243 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1244 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1246 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1248 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1250 video_thumbnail = mobj.group(1)
1251 else: # we need something to pass to process_info
1252 video_thumbnail = ''
1256 # Process video information
1257 self._downloader.process_info({
1258 'id': video_id.decode('utf-8'),
1259 'url': video_url.decode('utf-8'),
1261 'title': video_title,
1262 'stitle': simple_title,
1263 'ext': video_extension.decode('utf-8'),
1267 except UnavailableVideoError:
1268 self._downloader.trouble(u'ERROR: unable to download video')
1271 class PhotobucketIE(InfoExtractor):
1272 """Information extractor for photobucket.com."""
1274 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1276 def __init__(self, downloader=None):
1277 InfoExtractor.__init__(self, downloader)
1281 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1283 def report_download_webpage(self, video_id):
1284 """Report webpage download."""
1285 self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
1287 def report_extraction(self, video_id):
1288 """Report information extraction."""
1289 self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
1291 def _real_initialize(self):
1294 def _real_extract(self, url):
1295 # Extract id from URL
1296 mobj = re.match(self._VALID_URL, url)
1298 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1301 # At this point we have a new video
1302 self._downloader.increment_downloads()
1303 video_id = mobj.group(1)
1305 video_extension = 'flv'
1307 # Retrieve video webpage to extract further information
1308 request = urllib2.Request(url)
1310 self.report_download_webpage(video_id)
1311 webpage = urllib2.urlopen(request).read()
1312 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1313 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1316 # Extract URL, uploader, and title from webpage
1317 self.report_extraction(video_id)
1318 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1320 self._downloader.trouble(u'ERROR: unable to extract media URL')
1322 mediaURL = urllib.unquote(mobj.group(1))
1324 video_url = mediaURL
1326 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1328 self._downloader.trouble(u'ERROR: unable to extract title')
1330 video_title = mobj.group(1).decode('utf-8')
1331 video_title = sanitize_title(video_title)
1332 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1334 video_uploader = mobj.group(2).decode('utf-8')
1337 # Process video information
1338 self._downloader.process_info({
1339 'id': video_id.decode('utf-8'),
1340 'url': video_url.decode('utf-8'),
1341 'uploader': video_uploader,
1342 'title': video_title,
1343 'stitle': simple_title,
1344 'ext': video_extension.decode('utf-8'),
1348 except UnavailableVideoError:
1349 self._downloader.trouble(u'ERROR: unable to download video')
1352 class YahooIE(InfoExtractor):
1353 """Information extractor for video.yahoo.com."""
1355 # _VALID_URL matches all Yahoo! Video URLs
1356 # _VPAGE_URL matches only the extractable '/watch/' URLs
1357 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1358 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1360 def __init__(self, downloader=None):
1361 InfoExtractor.__init__(self, downloader)
1365 return (re.match(YahooIE._VALID_URL, url) is not None)
1367 def report_download_webpage(self, video_id):
1368 """Report webpage download."""
1369 self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id)
1371 def report_extraction(self, video_id):
1372 """Report information extraction."""
1373 self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id)
1375 def _real_initialize(self):
1378 def _real_extract(self, url, new_video=True):
1379 # Extract ID from URL
1380 mobj = re.match(self._VALID_URL, url)
1382 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1385 # At this point we have a new video
1386 self._downloader.increment_downloads()
1387 video_id = mobj.group(2)
1388 video_extension = 'flv'
1390 # Rewrite valid but non-extractable URLs as
1391 # extractable English language /watch/ URLs
1392 if re.match(self._VPAGE_URL, url) is None:
1393 request = urllib2.Request(url)
1395 webpage = urllib2.urlopen(request).read()
1396 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1397 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1400 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1402 self._downloader.trouble(u'ERROR: Unable to extract id field')
1404 yahoo_id = mobj.group(1)
1406 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1408 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1410 yahoo_vid = mobj.group(1)
1412 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1413 return self._real_extract(url, new_video=False)
1415 # Retrieve video webpage to extract further information
1416 request = urllib2.Request(url)
1418 self.report_download_webpage(video_id)
1419 webpage = urllib2.urlopen(request).read()
1420 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1421 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1424 # Extract uploader and title from webpage
1425 self.report_extraction(video_id)
1426 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1428 self._downloader.trouble(u'ERROR: unable to extract video title')
1430 video_title = mobj.group(1).decode('utf-8')
1431 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1433 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1435 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1437 video_uploader = mobj.group(1).decode('utf-8')
1439 # Extract video thumbnail
1440 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1442 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1444 video_thumbnail = mobj.group(1).decode('utf-8')
1446 # Extract video description
1447 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1449 self._downloader.trouble(u'ERROR: unable to extract video description')
1451 video_description = mobj.group(1).decode('utf-8')
1452 if not video_description: video_description = 'No description available.'
1454 # Extract video height and width
1455 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1457 self._downloader.trouble(u'ERROR: unable to extract video height')
1459 yv_video_height = mobj.group(1)
1461 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1463 self._downloader.trouble(u'ERROR: unable to extract video width')
1465 yv_video_width = mobj.group(1)
1467 # Retrieve video playlist to extract media URL
1468 # I'm not completely sure what all these options are, but we
1469 # seem to need most of them, otherwise the server sends a 401.
1470 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1471 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1472 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1473 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1474 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1476 self.report_download_webpage(video_id)
1477 webpage = urllib2.urlopen(request).read()
1478 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1479 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1482 # Extract media URL from playlist XML
1483 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1485 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1487 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1488 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1491 # Process video information
1492 self._downloader.process_info({
1493 'id': video_id.decode('utf-8'),
1495 'uploader': video_uploader,
1496 'title': video_title,
1497 'stitle': simple_title,
1498 'ext': video_extension.decode('utf-8'),
1499 'thumbnail': video_thumbnail.decode('utf-8'),
1500 'description': video_description,
1501 'thumbnail': video_thumbnail,
1502 'description': video_description,
1505 except UnavailableVideoError:
1506 self._downloader.trouble(u'ERROR: unable to download video')
1509 class GenericIE(InfoExtractor):
1510 """Generic last-resort information extractor."""
1512 def __init__(self, downloader=None):
1513 InfoExtractor.__init__(self, downloader)
1519 def report_download_webpage(self, video_id):
1520 """Report webpage download."""
1521 self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.')
1522 self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id)
1524 def report_extraction(self, video_id):
1525 """Report information extraction."""
1526 self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id)
1528 def _real_initialize(self):
1531 def _real_extract(self, url):
1532 # At this point we have a new video
1533 self._downloader.increment_downloads()
1535 video_id = url.split('/')[-1]
1536 request = urllib2.Request(url)
1538 self.report_download_webpage(video_id)
1539 webpage = urllib2.urlopen(request).read()
1540 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1541 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1543 except ValueError, err:
1544 # since this is the last-resort InfoExtractor, if
1545 # this error is thrown, it'll be thrown here
1546 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1549 # Start with something easy: JW Player in SWFObject
1550 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1552 # Broaden the search a little bit
1553 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1555 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1558 # It's possible that one of the regexes
1559 # matched, but returned an empty group:
1560 if mobj.group(1) is None:
1561 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1564 video_url = urllib.unquote(mobj.group(1))
1565 video_id = os.path.basename(video_url)
1567 # here's a fun little line of code for you:
1568 video_extension = os.path.splitext(video_id)[1][1:]
1569 video_id = os.path.splitext(video_id)[0]
1571 # it's tempting to parse this further, but you would
1572 # have to take into account all the variations like
1573 # Video Title - Site Name
1574 # Site Name | Video Title
1575 # Video Title - Tagline | Site Name
1576 # and so on and so forth; it's just not practical
1577 mobj = re.search(r'<title>(.*)</title>', webpage)
1579 self._downloader.trouble(u'ERROR: unable to extract title')
1581 video_title = mobj.group(1).decode('utf-8')
1582 video_title = sanitize_title(video_title)
1583 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1585 # video uploader is domain name
1586 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1588 self._downloader.trouble(u'ERROR: unable to extract title')
1590 video_uploader = mobj.group(1).decode('utf-8')
1593 # Process video information
1594 self._downloader.process_info({
1595 'id': video_id.decode('utf-8'),
1596 'url': video_url.decode('utf-8'),
1597 'uploader': video_uploader,
1598 'title': video_title,
1599 'stitle': simple_title,
1600 'ext': video_extension.decode('utf-8'),
1604 except UnavailableVideoError, err:
1605 self._downloader.trouble(u'ERROR: unable to download video')
1608 class YoutubeSearchIE(InfoExtractor):
1609 """Information Extractor for YouTube search queries."""
1610 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1611 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1612 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1613 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1615 _max_youtube_results = 1000
1617 def __init__(self, youtube_ie, downloader=None):
1618 InfoExtractor.__init__(self, downloader)
1619 self._youtube_ie = youtube_ie
1623 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1625 def report_download_page(self, query, pagenum):
1626 """Report attempt to download playlist page with given number."""
1627 query = query.decode(preferredencoding())
1628 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1630 def _real_initialize(self):
1631 self._youtube_ie.initialize()
1633 def _real_extract(self, query):
1634 mobj = re.match(self._VALID_QUERY, query)
1636 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1639 prefix, query = query.split(':')
1641 query = query.encode('utf-8')
1643 self._download_n_results(query, 1)
1645 elif prefix == 'all':
1646 self._download_n_results(query, self._max_youtube_results)
1652 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1654 elif n > self._max_youtube_results:
1655 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
1656 n = self._max_youtube_results
1657 self._download_n_results(query, n)
1659 except ValueError: # parsing prefix as integer fails
1660 self._download_n_results(query, 1)
1663 def _download_n_results(self, query, n):
1664 """Downloads a specified number of results for a query"""
1667 already_seen = set()
1671 self.report_download_page(query, pagenum)
1672 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1673 request = urllib2.Request(result_url, None, std_headers)
1675 page = urllib2.urlopen(request).read()
1676 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1677 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1680 # Extract video identifiers
1681 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1682 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1683 if video_id not in already_seen:
1684 video_ids.append(video_id)
1685 already_seen.add(video_id)
1686 if len(video_ids) == n:
1687 # Specified n videos reached
1688 for id in video_ids:
1689 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1692 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1693 for id in video_ids:
1694 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1697 pagenum = pagenum + 1
1699 class GoogleSearchIE(InfoExtractor):
1700 """Information Extractor for Google Video search queries."""
1701 _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1702 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1703 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1704 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1706 _max_google_results = 1000
1708 def __init__(self, google_ie, downloader=None):
1709 InfoExtractor.__init__(self, downloader)
1710 self._google_ie = google_ie
1714 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1716 def report_download_page(self, query, pagenum):
1717 """Report attempt to download playlist page with given number."""
1718 query = query.decode(preferredencoding())
1719 self._downloader.to_stdout(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1721 def _real_initialize(self):
1722 self._google_ie.initialize()
1724 def _real_extract(self, query):
1725 mobj = re.match(self._VALID_QUERY, query)
1727 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1730 prefix, query = query.split(':')
1732 query = query.encode('utf-8')
1734 self._download_n_results(query, 1)
1736 elif prefix == 'all':
1737 self._download_n_results(query, self._max_google_results)
1743 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1745 elif n > self._max_google_results:
1746 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
1747 n = self._max_google_results
1748 self._download_n_results(query, n)
1750 except ValueError: # parsing prefix as integer fails
1751 self._download_n_results(query, 1)
1754 def _download_n_results(self, query, n):
1755 """Downloads a specified number of results for a query"""
1758 already_seen = set()
1762 self.report_download_page(query, pagenum)
1763 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1764 request = urllib2.Request(result_url, None, std_headers)
1766 page = urllib2.urlopen(request).read()
1767 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1768 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1771 # Extract video identifiers
1772 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1773 video_id = mobj.group(1)
1774 if video_id not in already_seen:
1775 video_ids.append(video_id)
1776 already_seen.add(video_id)
1777 if len(video_ids) == n:
1778 # Specified n videos reached
1779 for id in video_ids:
1780 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1783 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1784 for id in video_ids:
1785 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1788 pagenum = pagenum + 1
1790 class YahooSearchIE(InfoExtractor):
1791 """Information Extractor for Yahoo! Video search queries."""
1792 _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1793 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1794 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1795 _MORE_PAGES_INDICATOR = r'\s*Next'
1797 _max_yahoo_results = 1000
1799 def __init__(self, yahoo_ie, downloader=None):
1800 InfoExtractor.__init__(self, downloader)
1801 self._yahoo_ie = yahoo_ie
1805 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1807 def report_download_page(self, query, pagenum):
1808 """Report attempt to download playlist page with given number."""
1809 query = query.decode(preferredencoding())
1810 self._downloader.to_stdout(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1812 def _real_initialize(self):
1813 self._yahoo_ie.initialize()
1815 def _real_extract(self, query):
1816 mobj = re.match(self._VALID_QUERY, query)
1818 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1821 prefix, query = query.split(':')
1823 query = query.encode('utf-8')
1825 self._download_n_results(query, 1)
1827 elif prefix == 'all':
1828 self._download_n_results(query, self._max_yahoo_results)
1834 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1836 elif n > self._max_yahoo_results:
1837 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
1838 n = self._max_yahoo_results
1839 self._download_n_results(query, n)
1841 except ValueError: # parsing prefix as integer fails
1842 self._download_n_results(query, 1)
1845 def _download_n_results(self, query, n):
1846 """Downloads a specified number of results for a query"""
1849 already_seen = set()
1853 self.report_download_page(query, pagenum)
1854 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1855 request = urllib2.Request(result_url, None, std_headers)
1857 page = urllib2.urlopen(request).read()
1858 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1859 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1862 # Extract video identifiers
1863 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1864 video_id = mobj.group(1)
1865 if video_id not in already_seen:
1866 video_ids.append(video_id)
1867 already_seen.add(video_id)
1868 if len(video_ids) == n:
1869 # Specified n videos reached
1870 for id in video_ids:
1871 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1874 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1875 for id in video_ids:
1876 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1879 pagenum = pagenum + 1
1881 class YoutubePlaylistIE(InfoExtractor):
1882 """Information Extractor for YouTube playlists."""
1884 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1885 _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1886 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1887 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1890 def __init__(self, youtube_ie, downloader=None):
1891 InfoExtractor.__init__(self, downloader)
1892 self._youtube_ie = youtube_ie
1896 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1898 def report_download_page(self, playlist_id, pagenum):
1899 """Report attempt to download playlist page with given number."""
1900 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1902 def _real_initialize(self):
1903 self._youtube_ie.initialize()
1905 def _real_extract(self, url):
1906 # Extract playlist id
1907 mobj = re.match(self._VALID_URL, url)
1909 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1912 # Download playlist pages
1913 playlist_id = mobj.group(1)
1918 self.report_download_page(playlist_id, pagenum)
1919 request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1921 page = urllib2.urlopen(request).read()
1922 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1923 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1926 # Extract video identifiers
1928 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1929 if mobj.group(1) not in ids_in_page:
1930 ids_in_page.append(mobj.group(1))
1931 video_ids.extend(ids_in_page)
1933 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1935 pagenum = pagenum + 1
1937 for id in video_ids:
1938 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1941 class YoutubeUserIE(InfoExtractor):
1942 """Information Extractor for YouTube users."""
1944 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1945 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1946 _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1949 def __init__(self, youtube_ie, downloader=None):
1950 InfoExtractor.__init__(self, downloader)
1951 self._youtube_ie = youtube_ie
1955 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1957 def report_download_page(self, username):
1958 """Report attempt to download user page."""
1959 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1961 def _real_initialize(self):
1962 self._youtube_ie.initialize()
1964 def _real_extract(self, url):
1966 mobj = re.match(self._VALID_URL, url)
1968 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1971 # Download user page
1972 username = mobj.group(1)
1976 self.report_download_page(username)
1977 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1979 page = urllib2.urlopen(request).read()
1980 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1981 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1984 # Extract video identifiers
1987 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1988 if mobj.group(1) not in ids_in_page:
1989 ids_in_page.append(mobj.group(1))
1990 video_ids.extend(ids_in_page)
1992 for id in video_ids:
1993 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1996 class PostProcessor(object):
1997 """Post Processor class.
1999 PostProcessor objects can be added to downloaders with their
2000 add_post_processor() method. When the downloader has finished a
2001 successful download, it will take its internal chain of PostProcessors
2002 and start calling the run() method on each one of them, first with
2003 an initial argument and then with the returned value of the previous
2006 The chain will be stopped if one of them ever returns None or the end
2007 of the chain is reached.
2009 PostProcessor objects follow a "mutual registration" process similar
2010 to InfoExtractor objects.
2015 def __init__(self, downloader=None):
2016 self._downloader = downloader
2018 def set_downloader(self, downloader):
2019 """Sets the downloader for this PP."""
2020 self._downloader = downloader
2022 def run(self, information):
2023 """Run the PostProcessor.
2025 The "information" argument is a dictionary like the ones
2026 composed by InfoExtractors. The only difference is that this
2027 one has an extra field called "filepath" that points to the
2030 When this method returns None, the postprocessing chain is
2031 stopped. However, this method may return an information
2032 dictionary that will be passed to the next postprocessing
2033 object in the chain. It can be the one it received after
2034 changing some fields.
2036 In addition, this method may raise a PostProcessingError
2037 exception that will be taken into account by the downloader
2040 return information # by default, do nothing
2042 ### MAIN PROGRAM ###
2043 if __name__ == '__main__':
2045 # Modules needed only when running the main program
2049 # Function to update the program file with the latest version from bitbucket.org
2050 def update_self(downloader, filename):
2051 # Note: downloader only used for options
2052 if not os.access (filename, os.W_OK):
2053 sys.exit('ERROR: no write permissions on %s' % filename)
2055 downloader.to_stdout('Updating to latest stable version...')
2056 latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
2057 latest_version = urllib.urlopen(latest_url).read().strip()
2058 prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2059 newcontent = urllib.urlopen(prog_url).read()
2060 stream = open(filename, 'w')
2061 stream.write(newcontent)
2063 downloader.to_stdout('Updated to version %s' % latest_version)
2065 # General configuration
2066 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2067 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
2068 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2070 # Parse command line
2071 parser = optparse.OptionParser(
2072 usage='Usage: %prog [options] url...',
2073 version='2010.07.24',
2074 conflict_handler='resolve',
2077 parser.add_option('-h', '--help',
2078 action='help', help='print this help text and exit')
2079 parser.add_option('-v', '--version',
2080 action='version', help='print program version and exit')
2081 parser.add_option('-U', '--update',
2082 action='store_true', dest='update_self', help='update this program to latest stable version')
2083 parser.add_option('-i', '--ignore-errors',
2084 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2085 parser.add_option('-r', '--rate-limit',
2086 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2087 parser.add_option('-R', '--retries',
2088 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2090 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2091 authentication.add_option('-u', '--username',
2092 dest='username', metavar='USERNAME', help='account username')
2093 authentication.add_option('-p', '--password',
2094 dest='password', metavar='PASSWORD', help='account password')
2095 authentication.add_option('-n', '--netrc',
2096 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2097 parser.add_option_group(authentication)
2099 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2100 video_format.add_option('-f', '--format',
2101 action='store', dest='format', metavar='FORMAT', help='video format code')
2102 video_format.add_option('-m', '--mobile-version',
2103 action='store_const', dest='format', help='alias for -f 17', const='17')
2104 video_format.add_option('--all-formats',
2105 action='store_const', dest='format', help='download all available video formats', const='-1')
2106 video_format.add_option('--max-quality',
2107 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2108 parser.add_option_group(video_format)
2110 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2111 verbosity.add_option('-q', '--quiet',
2112 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2113 verbosity.add_option('-s', '--simulate',
2114 action='store_true', dest='simulate', help='do not download video', default=False)
2115 verbosity.add_option('-g', '--get-url',
2116 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2117 verbosity.add_option('-e', '--get-title',
2118 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2119 verbosity.add_option('--get-thumbnail',
2120 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2121 verbosity.add_option('--get-description',
2122 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2123 verbosity.add_option('--no-progress',
2124 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2125 parser.add_option_group(verbosity)
2127 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2128 filesystem.add_option('-t', '--title',
2129 action='store_true', dest='usetitle', help='use title in file name', default=False)
2130 filesystem.add_option('-l', '--literal',
2131 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2132 filesystem.add_option('-o', '--output',
2133 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2134 filesystem.add_option('-a', '--batch-file',
2135 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2136 filesystem.add_option('-w', '--no-overwrites',
2137 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2138 filesystem.add_option('-c', '--continue',
2139 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2140 parser.add_option_group(filesystem)
2142 (opts, args) = parser.parse_args()
2144 # Batch file verification
2146 if opts.batchfile is not None:
2148 if opts.batchfile == '-':
2151 batchfd = open(opts.batchfile, 'r')
2152 batchurls = batchfd.readlines()
2153 batchurls = [x.strip() for x in batchurls]
2154 batchurls = [x for x in batchurls if len(x) > 0]
2156 sys.exit(u'ERROR: batch file could not be read')
2157 all_urls = batchurls + args
2159 # Conflicting, missing and erroneous options
2160 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2161 parser.error(u'using .netrc conflicts with giving username/password')
2162 if opts.password is not None and opts.username is None:
2163 parser.error(u'account username missing')
2164 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
2165 parser.error(u'using output template conflicts with using title or literal title')
2166 if opts.usetitle and opts.useliteral:
2167 parser.error(u'using title conflicts with using literal title')
2168 if opts.username is not None and opts.password is None:
2169 opts.password = getpass.getpass(u'Type account password and press return:')
2170 if opts.ratelimit is not None:
2171 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2172 if numeric_limit is None:
2173 parser.error(u'invalid rate limit specified')
2174 opts.ratelimit = numeric_limit
2175 if opts.retries is not None:
2177 opts.retries = long(opts.retries)
2178 except (TypeError, ValueError), err:
2179 parser.error(u'invalid retry count specified')
2181 # Information extractors
2182 youtube_ie = YoutubeIE()
2183 metacafe_ie = MetacafeIE(youtube_ie)
2184 dailymotion_ie = DailymotionIE()
2185 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2186 youtube_user_ie = YoutubeUserIE(youtube_ie)
2187 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2188 google_ie = GoogleIE()
2189 google_search_ie = GoogleSearchIE(google_ie)
2190 photobucket_ie = PhotobucketIE()
2191 yahoo_ie = YahooIE()
2192 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2193 generic_ie = GenericIE()
2196 fd = FileDownloader({
2197 'usenetrc': opts.usenetrc,
2198 'username': opts.username,
2199 'password': opts.password,
2200 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2201 'forceurl': opts.geturl,
2202 'forcetitle': opts.gettitle,
2203 'forcethumbnail': opts.getthumbnail,
2204 'forcedescription': opts.getdescription,
2205 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2206 'format': opts.format,
2207 'format_limit': opts.format_limit,
2208 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2209 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2210 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2211 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2212 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2213 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2214 or u'%(id)s.%(ext)s'),
2215 'ignoreerrors': opts.ignoreerrors,
2216 'ratelimit': opts.ratelimit,
2217 'nooverwrites': opts.nooverwrites,
2218 'retries': opts.retries,
2219 'continuedl': opts.continue_dl,
2220 'noprogress': opts.noprogress,
2222 fd.add_info_extractor(youtube_search_ie)
2223 fd.add_info_extractor(youtube_pl_ie)
2224 fd.add_info_extractor(youtube_user_ie)
2225 fd.add_info_extractor(metacafe_ie)
2226 fd.add_info_extractor(dailymotion_ie)
2227 fd.add_info_extractor(youtube_ie)
2228 fd.add_info_extractor(google_ie)
2229 fd.add_info_extractor(google_search_ie)
2230 fd.add_info_extractor(photobucket_ie)
2231 fd.add_info_extractor(yahoo_ie)
2232 fd.add_info_extractor(yahoo_search_ie)
2234 # This must come last since it's the
2235 # fallback if none of the others work
2236 fd.add_info_extractor(generic_ie)
2239 if opts.update_self:
2240 update_self(fd, sys.argv[0])
2243 if len(all_urls) < 1:
2244 if not opts.update_self:
2245 parser.error(u'you must provide at least one URL')
2248 retcode = fd.download(all_urls)
2251 except DownloadError:
2253 except SameFileError:
2254 sys.exit(u'ERROR: fixed output name but more than one file to download')
2255 except KeyboardInterrupt:
2256 sys.exit(u'\nERROR: Interrupted by user')