2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # License: Public domain code
23 # parse_qs was moved from the cgi module to the urlparse module recently.
25 from urlparse import parse_qs
27 from cgi import parse_qs
30 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100723 Firefox/3.6.8',
31 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
32 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
33 'Accept-Language': 'en-us,en;q=0.5',
36 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
38 def preferredencoding():
39 """Get preferred encoding.
41 Returns the best encoding scheme for the system, based on
42 locale.getpreferredencoding() and some further tweaks.
44 def yield_preferredencoding():
46 pref = locale.getpreferredencoding()
52 return yield_preferredencoding().next()
54 def htmlentity_transform(matchobj):
55 """Transforms an HTML entity to a Unicode character.
57 This function receives a match object and is intended to be used with
58 the re.sub() function.
60 entity = matchobj.group(1)
62 # Known non-numeric HTML entity
63 if entity in htmlentitydefs.name2codepoint:
64 return unichr(htmlentitydefs.name2codepoint[entity])
67 mobj = re.match(ur'(?u)#(x?\d+)', entity)
69 numstr = mobj.group(1)
70 if numstr.startswith(u'x'):
72 numstr = u'0%s' % numstr
75 return unichr(long(numstr, base))
77 # Unknown entity in name, return its literal representation
78 return (u'&%s;' % entity)
80 def sanitize_title(utitle):
81 """Sanitizes a video title so it could be used as part of a filename."""
82 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
83 return utitle.replace(unicode(os.sep), u'%')
85 def sanitize_open(filename, open_mode):
86 """Try to open the given filename, and slightly tweak it if this fails.
88 Attempts to open the given filename. If this fails, it tries to change
89 the filename slightly, step by step, until it's either able to open it
90 or it fails and raises a final exception, like the standard open()
93 It returns the tuple (stream, definitive_file_name).
97 return (sys.stdout, filename)
98 stream = open(filename, open_mode)
99 return (stream, filename)
100 except (IOError, OSError), err:
101 # In case of error, try to remove win32 forbidden chars
102 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
104 # An exception here should be caught in the caller
105 stream = open(filename, open_mode)
106 return (stream, filename)
109 class DownloadError(Exception):
110 """Download Error exception.
112 This exception may be thrown by FileDownloader objects if they are not
113 configured to continue on errors. They will contain the appropriate
118 class SameFileError(Exception):
119 """Same File exception.
121 This exception will be thrown by FileDownloader objects if they detect
122 multiple files would have to be downloaded to the same file on disk.
126 class PostProcessingError(Exception):
127 """Post Processing exception.
129 This exception may be raised by PostProcessor's .run() method to
130 indicate an error in the postprocessing task.
134 class UnavailableVideoError(Exception):
135 """Unavailable Format exception.
137 This exception will be thrown when a video is requested
138 in a format that is not available for that video.
142 class ContentTooShortError(Exception):
143 """Content Too Short exception.
145 This exception may be raised by FileDownloader objects when a file they
146 download is too small for what the server announced first, indicating
147 the connection was probably interrupted.
153 def __init__(self, downloaded, expected):
154 self.downloaded = downloaded
155 self.expected = expected
157 class FileDownloader(object):
158 """File Downloader class.
160 File downloader objects are the ones responsible of downloading the
161 actual video file and writing it to disk if the user has requested
162 it, among some other tasks. In most cases there should be one per
163 program. As, given a video URL, the downloader doesn't know how to
164 extract all the needed information, task that InfoExtractors do, it
165 has to pass the URL to one of them.
167 For this, file downloader objects have a method that allows
168 InfoExtractors to be registered in a given order. When it is passed
169 a URL, the file downloader handles it to the first InfoExtractor it
170 finds that reports being able to handle it. The InfoExtractor extracts
171 all the information about the video or videos the URL refers to, and
172 asks the FileDownloader to process the video information, possibly
173 downloading the video.
175 File downloaders accept a lot of parameters. In order not to saturate
176 the object constructor with arguments, it receives a dictionary of
177 options instead. These options are available through the params
178 attribute for the InfoExtractors to use. The FileDownloader also
179 registers itself as the downloader in charge for the InfoExtractors
180 that are added to it, so this is a "mutual registration".
184 username: Username for authentication purposes.
185 password: Password for authentication purposes.
186 usenetrc: Use netrc for authentication instead.
187 quiet: Do not print messages to stdout.
188 forceurl: Force printing final URL.
189 forcetitle: Force printing title.
190 simulate: Do not download the video files.
191 format: Video format code.
192 format_limit: Highest quality format to try.
193 outtmpl: Template for output names.
194 ignoreerrors: Do not stop on download errors.
195 ratelimit: Download speed limit, in bytes/sec.
196 nooverwrites: Prevent overwriting files.
197 retries: Number of times to retry for HTTP error 503
198 continuedl: Try to continue downloads if possible.
199 noprogress: Do not print the progress bar.
205 _download_retcode = None
206 _num_downloads = None
208 def __init__(self, params):
209 """Create a FileDownloader object with the given options."""
212 self._download_retcode = 0
213 self._num_downloads = 0
217 def pmkdir(filename):
218 """Create directory components in filename. Similar to Unix "mkdir -p"."""
219 components = filename.split(os.sep)
220 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
221 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
222 for dir in aggregate:
223 if not os.path.exists(dir):
227 def format_bytes(bytes):
230 if type(bytes) is str:
235 exponent = long(math.log(bytes, 1024.0))
236 suffix = 'bkMGTPEZY'[exponent]
237 converted = float(bytes) / float(1024**exponent)
238 return '%.2f%s' % (converted, suffix)
241 def calc_percent(byte_counter, data_len):
244 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
247 def calc_eta(start, now, total, current):
251 if current == 0 or dif < 0.001: # One millisecond
253 rate = float(current) / dif
254 eta = long((float(total) - float(current)) / rate)
255 (eta_mins, eta_secs) = divmod(eta, 60)
258 return '%02d:%02d' % (eta_mins, eta_secs)
261 def calc_speed(start, now, bytes):
263 if bytes == 0 or dif < 0.001: # One millisecond
264 return '%10s' % '---b/s'
265 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
268 def best_block_size(elapsed_time, bytes):
269 new_min = max(bytes / 2.0, 1.0)
270 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
271 if elapsed_time < 0.001:
273 rate = bytes / elapsed_time
281 def parse_bytes(bytestr):
282 """Parse a string indicating a byte quantity into a long integer."""
283 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
286 number = float(matchobj.group(1))
287 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
288 return long(round(number * multiplier))
290 def add_info_extractor(self, ie):
291 """Add an InfoExtractor object to the end of the list."""
293 ie.set_downloader(self)
295 def add_post_processor(self, pp):
296 """Add a PostProcessor object to the end of the chain."""
298 pp.set_downloader(self)
300 def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False):
301 """Print message to stdout if not in quiet mode."""
303 if not self.params.get('quiet', False):
304 print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
306 except (UnicodeEncodeError), err:
307 if not ignore_encoding_errors:
310 def to_stderr(self, message):
311 """Print message to stderr."""
312 print >>sys.stderr, message.encode(preferredencoding())
314 def fixed_template(self):
315 """Checks if the output template is fixed."""
316 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
318 def trouble(self, message=None):
319 """Determine action to take when a download problem appears.
321 Depending on if the downloader has been configured to ignore
322 download errors or not, this method may throw an exception or
323 not when errors are found, after printing the message.
325 if message is not None:
326 self.to_stderr(message)
327 if not self.params.get('ignoreerrors', False):
328 raise DownloadError(message)
329 self._download_retcode = 1
331 def slow_down(self, start_time, byte_counter):
332 """Sleep if the download speed is over the rate limit."""
333 rate_limit = self.params.get('ratelimit', None)
334 if rate_limit is None or byte_counter == 0:
337 elapsed = now - start_time
340 speed = float(byte_counter) / elapsed
341 if speed > rate_limit:
342 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
344 def report_destination(self, filename):
345 """Report destination filename."""
346 self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
348 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
349 """Report download progress."""
350 if self.params.get('noprogress', False):
352 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
353 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
355 def report_resuming_byte(self, resume_len):
356 """Report attempt to resume at given byte."""
357 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
359 def report_retry(self, count, retries):
360 """Report retry in case of HTTP error 503"""
361 self.to_stdout(u'[download] Got HTTP error 503. Retrying (attempt %d of %d)...' % (count, retries))
363 def report_file_already_downloaded(self, file_name):
364 """Report file has already been fully downloaded."""
366 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
367 except (UnicodeEncodeError), err:
368 self.to_stdout(u'[download] The file has already been downloaded')
370 def report_unable_to_resume(self):
371 """Report it was impossible to resume download."""
372 self.to_stdout(u'[download] Unable to resume')
374 def report_finish(self):
375 """Report download finished."""
376 if self.params.get('noprogress', False):
377 self.to_stdout(u'[download] Download completed')
381 def increment_downloads(self):
382 """Increment the ordinal that assigns a number to each file."""
383 self._num_downloads += 1
385 def process_info(self, info_dict):
386 """Process a single dictionary returned by an InfoExtractor."""
387 # Do nothing else if in simulate mode
388 if self.params.get('simulate', False):
390 if self.params.get('forcetitle', False):
391 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
392 if self.params.get('forceurl', False):
393 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
394 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
395 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
396 if self.params.get('forcedescription', False) and 'description' in info_dict:
397 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
402 template_dict = dict(info_dict)
403 template_dict['epoch'] = unicode(long(time.time()))
404 template_dict['ord'] = unicode('%05d' % self._num_downloads)
405 filename = self.params['outtmpl'] % template_dict
406 except (ValueError, KeyError), err:
407 self.trouble(u'ERROR: invalid system charset or erroneous output template')
409 if self.params.get('nooverwrites', False) and os.path.exists(filename):
410 self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
414 self.pmkdir(filename)
415 except (OSError, IOError), err:
416 self.trouble(u'ERROR: unable to create directories: %s' % str(err))
420 success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
421 except (OSError, IOError), err:
422 raise UnavailableVideoError
423 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
424 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
426 except (ContentTooShortError, ), err:
427 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
432 self.post_process(filename, info_dict)
433 except (PostProcessingError), err:
434 self.trouble(u'ERROR: postprocessing: %s' % str(err))
437 def download(self, url_list):
438 """Download a given list of URLs."""
439 if len(url_list) > 1 and self.fixed_template():
440 raise SameFileError(self.params['outtmpl'])
443 suitable_found = False
445 # Go to next InfoExtractor if not suitable
446 if not ie.suitable(url):
449 # Suitable InfoExtractor found
450 suitable_found = True
452 # Extract information from URL and process it
455 # Suitable InfoExtractor had been found; go to next URL
458 if not suitable_found:
459 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
461 return self._download_retcode
463 def post_process(self, filename, ie_info):
464 """Run the postprocessing chain on the given file."""
466 info['filepath'] = filename
472 def _download_with_rtmpdump(self, filename, url, player_url):
473 self.report_destination(filename)
475 # Check for rtmpdump first
477 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
478 except (OSError, IOError):
479 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
482 # Download using rtmpdump. rtmpdump returns exit code 2 when
483 # the connection was interrumpted and resuming appears to be
484 # possible. This is part of rtmpdump's normal usage, AFAIK.
485 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename]
486 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
487 while retval == 2 or retval == 1:
488 prevsize = os.path.getsize(filename)
489 self.to_stdout(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
490 time.sleep(5.0) # This seems to be needed
491 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
492 cursize = os.path.getsize(filename)
493 if prevsize == cursize and retval == 1:
496 self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
499 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
502 def _do_download(self, filename, url, player_url):
503 # Attempt to download using rtmpdump
504 if url.startswith('rtmp'):
505 return self._download_with_rtmpdump(filename, url, player_url)
509 basic_request = urllib2.Request(url, None, std_headers)
510 request = urllib2.Request(url, None, std_headers)
512 # Establish possible resume length
513 if os.path.isfile(filename):
514 resume_len = os.path.getsize(filename)
518 # Request parameters in case of being able to resume
519 if self.params.get('continuedl', False) and resume_len != 0:
520 self.report_resuming_byte(resume_len)
521 request.add_header('Range','bytes=%d-' % resume_len)
525 retries = self.params.get('retries', 0)
526 while count <= retries:
527 # Establish connection
529 data = urllib2.urlopen(request)
531 except (urllib2.HTTPError, ), err:
532 if err.code != 503 and err.code != 416:
533 # Unexpected HTTP error
535 elif err.code == 416:
536 # Unable to resume (requested range not satisfiable)
538 # Open the connection again without the range header
539 data = urllib2.urlopen(basic_request)
540 content_length = data.info()['Content-Length']
541 except (urllib2.HTTPError, ), err:
545 # Examine the reported length
546 if (content_length is not None and
547 (resume_len - 100 < long(content_length) < resume_len + 100)):
548 # The file had already been fully downloaded.
549 # Explanation to the above condition: in issue #175 it was revealed that
550 # YouTube sometimes adds or removes a few bytes from the end of the file,
551 # changing the file size slightly and causing problems for some users. So
552 # I decided to implement a suggested change and consider the file
553 # completely downloaded if the file size differs less than 100 bytes from
554 # the one in the hard drive.
555 self.report_file_already_downloaded(filename)
558 # The length does not match, we start the download over
559 self.report_unable_to_resume()
565 self.report_retry(count, retries)
568 self.trouble(u'ERROR: giving up after %s retries' % retries)
571 data_len = data.info().get('Content-length', None)
572 data_len_str = self.format_bytes(data_len)
579 data_block = data.read(block_size)
581 data_block_len = len(data_block)
582 if data_block_len == 0:
584 byte_counter += data_block_len
586 # Open file just in time
589 (stream, filename) = sanitize_open(filename, open_mode)
590 self.report_destination(filename)
591 except (OSError, IOError), err:
592 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
595 stream.write(data_block)
596 except (IOError, OSError), err:
597 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
599 block_size = self.best_block_size(after - before, data_block_len)
602 percent_str = self.calc_percent(byte_counter, data_len)
603 eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
604 speed_str = self.calc_speed(start, time.time(), byte_counter)
605 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
608 self.slow_down(start, byte_counter)
611 if data_len is not None and str(byte_counter) != data_len:
612 raise ContentTooShortError(byte_counter, long(data_len))
615 class InfoExtractor(object):
616 """Information Extractor class.
618 Information extractors are the classes that, given a URL, extract
619 information from the video (or videos) the URL refers to. This
620 information includes the real video URL, the video title and simplified
621 title, author and others. The information is stored in a dictionary
622 which is then passed to the FileDownloader. The FileDownloader
623 processes this information possibly downloading the video to the file
624 system, among other possible outcomes. The dictionaries must include
625 the following fields:
627 id: Video identifier.
628 url: Final video URL.
629 uploader: Nickname of the video uploader.
630 title: Literal title.
631 stitle: Simplified title.
632 ext: Video filename extension.
633 format: Video format.
634 player_url: SWF Player URL (may be None).
636 The following fields are optional. Their primary purpose is to allow
637 youtube-dl to serve as the backend for a video search function, such
638 as the one in youtube2mp3. They are only used when their respective
639 forced printing functions are called:
641 thumbnail: Full URL to a video thumbnail image.
642 description: One-line video description.
644 Subclasses of this one should re-define the _real_initialize() and
645 _real_extract() methods, as well as the suitable() static method.
646 Probably, they should also be instantiated and added to the main
653 def __init__(self, downloader=None):
654 """Constructor. Receives an optional downloader."""
656 self.set_downloader(downloader)
660 """Receives a URL and returns True if suitable for this IE."""
663 def initialize(self):
664 """Initializes an instance (authentication, etc)."""
666 self._real_initialize()
669 def extract(self, url):
670 """Extracts URL information and returns it in list of dicts."""
672 return self._real_extract(url)
674 def set_downloader(self, downloader):
675 """Sets the downloader for this IE."""
676 self._downloader = downloader
678 def _real_initialize(self):
679 """Real initialization process. Redefine in subclasses."""
682 def _real_extract(self, url):
683 """Real extraction process. Redefine in subclasses."""
686 class YoutubeIE(InfoExtractor):
687 """Information extractor for youtube.com."""
689 _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
690 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
691 _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
692 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
693 _NETRC_MACHINE = 'youtube'
694 # Listed in order of quality
695 _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
696 _video_extensions = {
702 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
709 return (re.match(YoutubeIE._VALID_URL, url) is not None)
711 def report_lang(self):
712 """Report attempt to set language."""
713 self._downloader.to_stdout(u'[youtube] Setting language')
715 def report_login(self):
716 """Report attempt to log in."""
717 self._downloader.to_stdout(u'[youtube] Logging in')
719 def report_age_confirmation(self):
720 """Report attempt to confirm age."""
721 self._downloader.to_stdout(u'[youtube] Confirming age')
723 def report_video_webpage_download(self, video_id):
724 """Report attempt to download video webpage."""
725 self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
727 def report_video_info_webpage_download(self, video_id):
728 """Report attempt to download video info webpage."""
729 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
731 def report_information_extraction(self, video_id):
732 """Report attempt to extract video information."""
733 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
735 def report_unavailable_format(self, video_id, format):
736 """Report extracted video URL."""
737 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
739 def report_rtmp_download(self):
740 """Indicate the download will use the RTMP protocol."""
741 self._downloader.to_stdout(u'[youtube] RTMP download detected')
743 def _real_initialize(self):
744 if self._downloader is None:
749 downloader_params = self._downloader.params
751 # Attempt to use provided username and password or .netrc data
752 if downloader_params.get('username', None) is not None:
753 username = downloader_params['username']
754 password = downloader_params['password']
755 elif downloader_params.get('usenetrc', False):
757 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
762 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
763 except (IOError, netrc.NetrcParseError), err:
764 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
768 request = urllib2.Request(self._LANG_URL, None, std_headers)
771 urllib2.urlopen(request).read()
772 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
773 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
776 # No authentication to be performed
782 'current_form': 'loginForm',
784 'action_login': 'Log In',
785 'username': username,
786 'password': password,
788 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
791 login_results = urllib2.urlopen(request).read()
792 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
793 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
795 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
796 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
802 'action_confirm': 'Confirm',
804 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
806 self.report_age_confirmation()
807 age_results = urllib2.urlopen(request).read()
808 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
809 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
812 def _real_extract(self, url):
813 # Extract video id from URL
814 mobj = re.match(self._VALID_URL, url)
816 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
818 video_id = mobj.group(2)
821 self.report_video_webpage_download(video_id)
822 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers)
824 video_webpage = urllib2.urlopen(request).read()
825 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
826 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
829 # Attempt to extract SWF player URL
830 mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage)
832 player_url = mobj.group(1)
837 self.report_video_info_webpage_download(video_id)
838 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
839 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
840 % (video_id, el_type))
841 request = urllib2.Request(video_info_url, None, std_headers)
843 video_info_webpage = urllib2.urlopen(request).read()
844 video_info = parse_qs(video_info_webpage)
845 if 'token' in video_info:
847 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
848 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
850 if 'token' not in video_info:
851 if 'reason' in video_info:
852 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0])
854 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
857 # Start extracting information
858 self.report_information_extraction(video_id)
861 if 'author' not in video_info:
862 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
864 video_uploader = urllib.unquote_plus(video_info['author'][0])
867 if 'title' not in video_info:
868 self._downloader.trouble(u'ERROR: unable to extract video title')
870 video_title = urllib.unquote_plus(video_info['title'][0])
871 video_title = video_title.decode('utf-8')
872 video_title = sanitize_title(video_title)
875 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
876 simple_title = simple_title.strip(ur'_')
879 if 'thumbnail_url' not in video_info:
880 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
882 else: # don't panic if we can't find it
883 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
886 video_description = 'No description available.'
887 if self._downloader.params.get('forcedescription', False):
888 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
890 video_description = mobj.group(1)
893 video_token = urllib.unquote_plus(video_info['token'][0])
895 # Decide which formats to download
896 requested_format = self._downloader.params.get('format', None)
897 get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
899 if 'fmt_url_map' in video_info:
900 url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
901 format_limit = self._downloader.params.get('format_limit', None)
902 if format_limit is not None and format_limit in self._available_formats:
903 format_list = self._available_formats[self._available_formats.index(format_limit):]
905 format_list = self._available_formats
906 existing_formats = [x for x in format_list if x in url_map]
907 if len(existing_formats) == 0:
908 self._downloader.trouble(u'ERROR: no known formats available for video')
910 if requested_format is None:
911 video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality
912 elif requested_format == '-1':
913 video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats
915 video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format
917 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
918 self.report_rtmp_download()
919 video_url_list = [(None, video_info['conn'][0])]
922 self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
925 for format_param, video_real_url in video_url_list:
926 # At this point we have a new video
927 self._downloader.increment_downloads()
930 video_extension = self._video_extensions.get(format_param, 'flv')
932 # Find the video URL in fmt_url_map or conn paramters
934 # Process video information
935 self._downloader.process_info({
936 'id': video_id.decode('utf-8'),
937 'url': video_real_url.decode('utf-8'),
938 'uploader': video_uploader.decode('utf-8'),
939 'title': video_title,
940 'stitle': simple_title,
941 'ext': video_extension.decode('utf-8'),
942 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
943 'thumbnail': video_thumbnail.decode('utf-8'),
944 'description': video_description.decode('utf-8'),
945 'player_url': player_url,
947 except UnavailableVideoError, err:
948 self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
951 class MetacafeIE(InfoExtractor):
952 """Information Extractor for metacafe.com."""
954 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
955 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
956 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
959 def __init__(self, youtube_ie, downloader=None):
960 InfoExtractor.__init__(self, downloader)
961 self._youtube_ie = youtube_ie
965 return (re.match(MetacafeIE._VALID_URL, url) is not None)
967 def report_disclaimer(self):
968 """Report disclaimer retrieval."""
969 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
971 def report_age_confirmation(self):
972 """Report attempt to confirm age."""
973 self._downloader.to_stdout(u'[metacafe] Confirming age')
975 def report_download_webpage(self, video_id):
976 """Report webpage download."""
977 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
979 def report_extraction(self, video_id):
980 """Report information extraction."""
981 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
983 def _real_initialize(self):
984 # Retrieve disclaimer
985 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
987 self.report_disclaimer()
988 disclaimer = urllib2.urlopen(request).read()
989 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
990 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
996 'submit': "Continue - I'm over 18",
998 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
1000 self.report_age_confirmation()
1001 disclaimer = urllib2.urlopen(request).read()
1002 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1003 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1006 def _real_extract(self, url):
1007 # Extract id and simplified title from URL
1008 mobj = re.match(self._VALID_URL, url)
1010 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1013 video_id = mobj.group(1)
1015 # Check if video comes from YouTube
1016 mobj2 = re.match(r'^yt-(.*)$', video_id)
1017 if mobj2 is not None:
1018 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1021 # At this point we have a new video
1022 self._downloader.increment_downloads()
1024 simple_title = mobj.group(2).decode('utf-8')
1025 video_extension = 'flv'
1027 # Retrieve video webpage to extract further information
1028 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1030 self.report_download_webpage(video_id)
1031 webpage = urllib2.urlopen(request).read()
1032 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1033 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1036 # Extract URL, uploader and title from webpage
1037 self.report_extraction(video_id)
1038 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1039 if mobj is not None:
1040 mediaURL = urllib.unquote(mobj.group(1))
1042 # Extract gdaKey if available
1043 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1045 video_url = mediaURL
1047 gdaKey = mobj.group(1)
1048 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1050 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1052 self._downloader.trouble(u'ERROR: unable to extract media URL')
1054 vardict = parse_qs(mobj.group(1))
1055 if 'mediaData' not in vardict:
1056 self._downloader.trouble(u'ERROR: unable to extract media URL')
1058 mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1060 self._downloader.trouble(u'ERROR: unable to extract media URL')
1062 video_url = '%s?__gda__=%s' % (mobj.group(1).replace('\\/', '/'), mobj.group(2))
1064 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1066 self._downloader.trouble(u'ERROR: unable to extract title')
1068 video_title = mobj.group(1).decode('utf-8')
1069 video_title = sanitize_title(video_title)
1071 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1073 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1075 video_uploader = mobj.group(1)
1078 # Process video information
1079 self._downloader.process_info({
1080 'id': video_id.decode('utf-8'),
1081 'url': video_url.decode('utf-8'),
1082 'uploader': video_uploader.decode('utf-8'),
1083 'title': video_title,
1084 'stitle': simple_title,
1085 'ext': video_extension.decode('utf-8'),
1089 except UnavailableVideoError:
1090 self._downloader.trouble(u'ERROR: unable to download video')
1093 class DailymotionIE(InfoExtractor):
1094 """Information Extractor for Dailymotion"""
1096 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1098 def __init__(self, downloader=None):
1099 InfoExtractor.__init__(self, downloader)
1103 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1105 def report_download_webpage(self, video_id):
1106 """Report webpage download."""
1107 self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id)
1109 def report_extraction(self, video_id):
1110 """Report information extraction."""
1111 self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id)
1113 def _real_initialize(self):
1116 def _real_extract(self, url):
1117 # Extract id and simplified title from URL
1118 mobj = re.match(self._VALID_URL, url)
1120 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1123 # At this point we have a new video
1124 self._downloader.increment_downloads()
1125 video_id = mobj.group(1)
1127 simple_title = mobj.group(2).decode('utf-8')
1128 video_extension = 'flv'
1130 # Retrieve video webpage to extract further information
1131 request = urllib2.Request(url)
1133 self.report_download_webpage(video_id)
1134 webpage = urllib2.urlopen(request).read()
1135 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1136 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1139 # Extract URL, uploader and title from webpage
1140 self.report_extraction(video_id)
1141 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1143 self._downloader.trouble(u'ERROR: unable to extract media URL')
1145 mediaURL = urllib.unquote(mobj.group(1))
1147 # if needed add http://www.dailymotion.com/ if relative URL
1149 video_url = mediaURL
1151 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1152 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1154 self._downloader.trouble(u'ERROR: unable to extract title')
1156 video_title = mobj.group(1).decode('utf-8')
1157 video_title = sanitize_title(video_title)
1159 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a></div>', webpage)
1161 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1163 video_uploader = mobj.group(1)
1166 # Process video information
1167 self._downloader.process_info({
1168 'id': video_id.decode('utf-8'),
1169 'url': video_url.decode('utf-8'),
1170 'uploader': video_uploader.decode('utf-8'),
1171 'title': video_title,
1172 'stitle': simple_title,
1173 'ext': video_extension.decode('utf-8'),
1177 except UnavailableVideoError:
1178 self._downloader.trouble(u'ERROR: unable to download video')
1180 class GoogleIE(InfoExtractor):
1181 """Information extractor for video.google.com."""
1183 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1185 def __init__(self, downloader=None):
1186 InfoExtractor.__init__(self, downloader)
1190 return (re.match(GoogleIE._VALID_URL, url) is not None)
1192 def report_download_webpage(self, video_id):
1193 """Report webpage download."""
1194 self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
1196 def report_extraction(self, video_id):
1197 """Report information extraction."""
1198 self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
1200 def _real_initialize(self):
1203 def _real_extract(self, url):
1204 # Extract id from URL
1205 mobj = re.match(self._VALID_URL, url)
1207 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1210 # At this point we have a new video
1211 self._downloader.increment_downloads()
1212 video_id = mobj.group(1)
1214 video_extension = 'mp4'
1216 # Retrieve video webpage to extract further information
1217 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1219 self.report_download_webpage(video_id)
1220 webpage = urllib2.urlopen(request).read()
1221 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1222 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1225 # Extract URL, uploader, and title from webpage
1226 self.report_extraction(video_id)
1227 mobj = re.search(r"download_url:'([^']+)'", webpage)
1229 video_extension = 'flv'
1230 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1232 self._downloader.trouble(u'ERROR: unable to extract media URL')
1234 mediaURL = urllib.unquote(mobj.group(1))
1235 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1236 mediaURL = mediaURL.replace('\\x26', '\x26')
1238 video_url = mediaURL
1240 mobj = re.search(r'<title>(.*)</title>', webpage)
1242 self._downloader.trouble(u'ERROR: unable to extract title')
1244 video_title = mobj.group(1).decode('utf-8')
1245 video_title = sanitize_title(video_title)
1246 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1248 # Extract video description
1249 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1251 self._downloader.trouble(u'ERROR: unable to extract video description')
1253 video_description = mobj.group(1).decode('utf-8')
1254 if not video_description:
1255 video_description = 'No description available.'
1257 # Extract video thumbnail
1258 if self._downloader.params.get('forcethumbnail', False):
1259 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1261 webpage = urllib2.urlopen(request).read()
1262 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1263 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1265 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1267 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1269 video_thumbnail = mobj.group(1)
1270 else: # we need something to pass to process_info
1271 video_thumbnail = ''
1275 # Process video information
1276 self._downloader.process_info({
1277 'id': video_id.decode('utf-8'),
1278 'url': video_url.decode('utf-8'),
1280 'title': video_title,
1281 'stitle': simple_title,
1282 'ext': video_extension.decode('utf-8'),
1286 except UnavailableVideoError:
1287 self._downloader.trouble(u'ERROR: unable to download video')
1290 class PhotobucketIE(InfoExtractor):
1291 """Information extractor for photobucket.com."""
1293 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1295 def __init__(self, downloader=None):
1296 InfoExtractor.__init__(self, downloader)
1300 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1302 def report_download_webpage(self, video_id):
1303 """Report webpage download."""
1304 self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
1306 def report_extraction(self, video_id):
1307 """Report information extraction."""
1308 self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
1310 def _real_initialize(self):
1313 def _real_extract(self, url):
1314 # Extract id from URL
1315 mobj = re.match(self._VALID_URL, url)
1317 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1320 # At this point we have a new video
1321 self._downloader.increment_downloads()
1322 video_id = mobj.group(1)
1324 video_extension = 'flv'
1326 # Retrieve video webpage to extract further information
1327 request = urllib2.Request(url)
1329 self.report_download_webpage(video_id)
1330 webpage = urllib2.urlopen(request).read()
1331 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1332 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1335 # Extract URL, uploader, and title from webpage
1336 self.report_extraction(video_id)
1337 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1339 self._downloader.trouble(u'ERROR: unable to extract media URL')
1341 mediaURL = urllib.unquote(mobj.group(1))
1343 video_url = mediaURL
1345 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1347 self._downloader.trouble(u'ERROR: unable to extract title')
1349 video_title = mobj.group(1).decode('utf-8')
1350 video_title = sanitize_title(video_title)
1351 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1353 video_uploader = mobj.group(2).decode('utf-8')
1356 # Process video information
1357 self._downloader.process_info({
1358 'id': video_id.decode('utf-8'),
1359 'url': video_url.decode('utf-8'),
1360 'uploader': video_uploader,
1361 'title': video_title,
1362 'stitle': simple_title,
1363 'ext': video_extension.decode('utf-8'),
1367 except UnavailableVideoError:
1368 self._downloader.trouble(u'ERROR: unable to download video')
1371 class YahooIE(InfoExtractor):
1372 """Information extractor for video.yahoo.com."""
1374 # _VALID_URL matches all Yahoo! Video URLs
1375 # _VPAGE_URL matches only the extractable '/watch/' URLs
1376 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1377 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1379 def __init__(self, downloader=None):
1380 InfoExtractor.__init__(self, downloader)
1384 return (re.match(YahooIE._VALID_URL, url) is not None)
1386 def report_download_webpage(self, video_id):
1387 """Report webpage download."""
1388 self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id)
1390 def report_extraction(self, video_id):
1391 """Report information extraction."""
1392 self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id)
1394 def _real_initialize(self):
1397 def _real_extract(self, url, new_video=True):
1398 # Extract ID from URL
1399 mobj = re.match(self._VALID_URL, url)
1401 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1404 # At this point we have a new video
1405 self._downloader.increment_downloads()
1406 video_id = mobj.group(2)
1407 video_extension = 'flv'
1409 # Rewrite valid but non-extractable URLs as
1410 # extractable English language /watch/ URLs
1411 if re.match(self._VPAGE_URL, url) is None:
1412 request = urllib2.Request(url)
1414 webpage = urllib2.urlopen(request).read()
1415 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1416 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1419 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1421 self._downloader.trouble(u'ERROR: Unable to extract id field')
1423 yahoo_id = mobj.group(1)
1425 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1427 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1429 yahoo_vid = mobj.group(1)
1431 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1432 return self._real_extract(url, new_video=False)
1434 # Retrieve video webpage to extract further information
1435 request = urllib2.Request(url)
1437 self.report_download_webpage(video_id)
1438 webpage = urllib2.urlopen(request).read()
1439 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1440 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1443 # Extract uploader and title from webpage
1444 self.report_extraction(video_id)
1445 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1447 self._downloader.trouble(u'ERROR: unable to extract video title')
1449 video_title = mobj.group(1).decode('utf-8')
1450 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1452 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1454 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1456 video_uploader = mobj.group(1).decode('utf-8')
1458 # Extract video thumbnail
1459 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1461 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1463 video_thumbnail = mobj.group(1).decode('utf-8')
1465 # Extract video description
1466 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1468 self._downloader.trouble(u'ERROR: unable to extract video description')
1470 video_description = mobj.group(1).decode('utf-8')
1471 if not video_description: video_description = 'No description available.'
1473 # Extract video height and width
1474 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1476 self._downloader.trouble(u'ERROR: unable to extract video height')
1478 yv_video_height = mobj.group(1)
1480 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1482 self._downloader.trouble(u'ERROR: unable to extract video width')
1484 yv_video_width = mobj.group(1)
1486 # Retrieve video playlist to extract media URL
1487 # I'm not completely sure what all these options are, but we
1488 # seem to need most of them, otherwise the server sends a 401.
1489 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1490 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1491 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1492 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1493 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1495 self.report_download_webpage(video_id)
1496 webpage = urllib2.urlopen(request).read()
1497 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1498 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1501 # Extract media URL from playlist XML
1502 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1504 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1506 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1507 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1510 # Process video information
1511 self._downloader.process_info({
1512 'id': video_id.decode('utf-8'),
1514 'uploader': video_uploader,
1515 'title': video_title,
1516 'stitle': simple_title,
1517 'ext': video_extension.decode('utf-8'),
1518 'thumbnail': video_thumbnail.decode('utf-8'),
1519 'description': video_description,
1520 'thumbnail': video_thumbnail,
1521 'description': video_description,
1524 except UnavailableVideoError:
1525 self._downloader.trouble(u'ERROR: unable to download video')
1528 class GenericIE(InfoExtractor):
1529 """Generic last-resort information extractor."""
1531 def __init__(self, downloader=None):
1532 InfoExtractor.__init__(self, downloader)
1538 def report_download_webpage(self, video_id):
1539 """Report webpage download."""
1540 self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.')
1541 self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id)
1543 def report_extraction(self, video_id):
1544 """Report information extraction."""
1545 self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id)
1547 def _real_initialize(self):
1550 def _real_extract(self, url):
1551 # At this point we have a new video
1552 self._downloader.increment_downloads()
1554 video_id = url.split('/')[-1]
1555 request = urllib2.Request(url)
1557 self.report_download_webpage(video_id)
1558 webpage = urllib2.urlopen(request).read()
1559 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1560 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1562 except ValueError, err:
1563 # since this is the last-resort InfoExtractor, if
1564 # this error is thrown, it'll be thrown here
1565 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1568 # Start with something easy: JW Player in SWFObject
1569 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1571 # Broaden the search a little bit
1572 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1574 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1577 # It's possible that one of the regexes
1578 # matched, but returned an empty group:
1579 if mobj.group(1) is None:
1580 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1583 video_url = urllib.unquote(mobj.group(1))
1584 video_id = os.path.basename(video_url)
1586 # here's a fun little line of code for you:
1587 video_extension = os.path.splitext(video_id)[1][1:]
1588 video_id = os.path.splitext(video_id)[0]
1590 # it's tempting to parse this further, but you would
1591 # have to take into account all the variations like
1592 # Video Title - Site Name
1593 # Site Name | Video Title
1594 # Video Title - Tagline | Site Name
1595 # and so on and so forth; it's just not practical
1596 mobj = re.search(r'<title>(.*)</title>', webpage)
1598 self._downloader.trouble(u'ERROR: unable to extract title')
1600 video_title = mobj.group(1).decode('utf-8')
1601 video_title = sanitize_title(video_title)
1602 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1604 # video uploader is domain name
1605 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1607 self._downloader.trouble(u'ERROR: unable to extract title')
1609 video_uploader = mobj.group(1).decode('utf-8')
1612 # Process video information
1613 self._downloader.process_info({
1614 'id': video_id.decode('utf-8'),
1615 'url': video_url.decode('utf-8'),
1616 'uploader': video_uploader,
1617 'title': video_title,
1618 'stitle': simple_title,
1619 'ext': video_extension.decode('utf-8'),
1623 except UnavailableVideoError, err:
1624 self._downloader.trouble(u'ERROR: unable to download video')
1627 class YoutubeSearchIE(InfoExtractor):
1628 """Information Extractor for YouTube search queries."""
1629 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1630 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1631 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1632 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1634 _max_youtube_results = 1000
1636 def __init__(self, youtube_ie, downloader=None):
1637 InfoExtractor.__init__(self, downloader)
1638 self._youtube_ie = youtube_ie
1642 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1644 def report_download_page(self, query, pagenum):
1645 """Report attempt to download playlist page with given number."""
1646 query = query.decode(preferredencoding())
1647 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1649 def _real_initialize(self):
1650 self._youtube_ie.initialize()
1652 def _real_extract(self, query):
1653 mobj = re.match(self._VALID_QUERY, query)
1655 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1658 prefix, query = query.split(':')
1660 query = query.encode('utf-8')
1662 self._download_n_results(query, 1)
1664 elif prefix == 'all':
1665 self._download_n_results(query, self._max_youtube_results)
1671 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1673 elif n > self._max_youtube_results:
1674 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
1675 n = self._max_youtube_results
1676 self._download_n_results(query, n)
1678 except ValueError: # parsing prefix as integer fails
1679 self._download_n_results(query, 1)
1682 def _download_n_results(self, query, n):
1683 """Downloads a specified number of results for a query"""
1686 already_seen = set()
1690 self.report_download_page(query, pagenum)
1691 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1692 request = urllib2.Request(result_url, None, std_headers)
1694 page = urllib2.urlopen(request).read()
1695 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1696 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1699 # Extract video identifiers
1700 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1701 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1702 if video_id not in already_seen:
1703 video_ids.append(video_id)
1704 already_seen.add(video_id)
1705 if len(video_ids) == n:
1706 # Specified n videos reached
1707 for id in video_ids:
1708 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1711 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1712 for id in video_ids:
1713 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1716 pagenum = pagenum + 1
1718 class GoogleSearchIE(InfoExtractor):
1719 """Information Extractor for Google Video search queries."""
1720 _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1721 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1722 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1723 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1725 _max_google_results = 1000
1727 def __init__(self, google_ie, downloader=None):
1728 InfoExtractor.__init__(self, downloader)
1729 self._google_ie = google_ie
1733 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1735 def report_download_page(self, query, pagenum):
1736 """Report attempt to download playlist page with given number."""
1737 query = query.decode(preferredencoding())
1738 self._downloader.to_stdout(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1740 def _real_initialize(self):
1741 self._google_ie.initialize()
1743 def _real_extract(self, query):
1744 mobj = re.match(self._VALID_QUERY, query)
1746 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1749 prefix, query = query.split(':')
1751 query = query.encode('utf-8')
1753 self._download_n_results(query, 1)
1755 elif prefix == 'all':
1756 self._download_n_results(query, self._max_google_results)
1762 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1764 elif n > self._max_google_results:
1765 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
1766 n = self._max_google_results
1767 self._download_n_results(query, n)
1769 except ValueError: # parsing prefix as integer fails
1770 self._download_n_results(query, 1)
1773 def _download_n_results(self, query, n):
1774 """Downloads a specified number of results for a query"""
1777 already_seen = set()
1781 self.report_download_page(query, pagenum)
1782 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1783 request = urllib2.Request(result_url, None, std_headers)
1785 page = urllib2.urlopen(request).read()
1786 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1787 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1790 # Extract video identifiers
1791 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1792 video_id = mobj.group(1)
1793 if video_id not in already_seen:
1794 video_ids.append(video_id)
1795 already_seen.add(video_id)
1796 if len(video_ids) == n:
1797 # Specified n videos reached
1798 for id in video_ids:
1799 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1802 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1803 for id in video_ids:
1804 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1807 pagenum = pagenum + 1
1809 class YahooSearchIE(InfoExtractor):
1810 """Information Extractor for Yahoo! Video search queries."""
1811 _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1812 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1813 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1814 _MORE_PAGES_INDICATOR = r'\s*Next'
1816 _max_yahoo_results = 1000
1818 def __init__(self, yahoo_ie, downloader=None):
1819 InfoExtractor.__init__(self, downloader)
1820 self._yahoo_ie = yahoo_ie
1824 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1826 def report_download_page(self, query, pagenum):
1827 """Report attempt to download playlist page with given number."""
1828 query = query.decode(preferredencoding())
1829 self._downloader.to_stdout(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1831 def _real_initialize(self):
1832 self._yahoo_ie.initialize()
1834 def _real_extract(self, query):
1835 mobj = re.match(self._VALID_QUERY, query)
1837 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1840 prefix, query = query.split(':')
1842 query = query.encode('utf-8')
1844 self._download_n_results(query, 1)
1846 elif prefix == 'all':
1847 self._download_n_results(query, self._max_yahoo_results)
1853 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1855 elif n > self._max_yahoo_results:
1856 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
1857 n = self._max_yahoo_results
1858 self._download_n_results(query, n)
1860 except ValueError: # parsing prefix as integer fails
1861 self._download_n_results(query, 1)
1864 def _download_n_results(self, query, n):
1865 """Downloads a specified number of results for a query"""
1868 already_seen = set()
1872 self.report_download_page(query, pagenum)
1873 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1874 request = urllib2.Request(result_url, None, std_headers)
1876 page = urllib2.urlopen(request).read()
1877 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1878 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1881 # Extract video identifiers
1882 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1883 video_id = mobj.group(1)
1884 if video_id not in already_seen:
1885 video_ids.append(video_id)
1886 already_seen.add(video_id)
1887 if len(video_ids) == n:
1888 # Specified n videos reached
1889 for id in video_ids:
1890 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1893 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1894 for id in video_ids:
1895 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1898 pagenum = pagenum + 1
1900 class YoutubePlaylistIE(InfoExtractor):
1901 """Information Extractor for YouTube playlists."""
1903 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1904 _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1905 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1906 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1909 def __init__(self, youtube_ie, downloader=None):
1910 InfoExtractor.__init__(self, downloader)
1911 self._youtube_ie = youtube_ie
1915 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1917 def report_download_page(self, playlist_id, pagenum):
1918 """Report attempt to download playlist page with given number."""
1919 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1921 def _real_initialize(self):
1922 self._youtube_ie.initialize()
1924 def _real_extract(self, url):
1925 # Extract playlist id
1926 mobj = re.match(self._VALID_URL, url)
1928 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1931 # Download playlist pages
1932 playlist_id = mobj.group(1)
1937 self.report_download_page(playlist_id, pagenum)
1938 request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1940 page = urllib2.urlopen(request).read()
1941 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1942 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1945 # Extract video identifiers
1947 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1948 if mobj.group(1) not in ids_in_page:
1949 ids_in_page.append(mobj.group(1))
1950 video_ids.extend(ids_in_page)
1952 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1954 pagenum = pagenum + 1
1956 playliststart = self._downloader.params.get('playliststart', 1)
1957 playliststart -= 1 #our arrays are zero-based but the playlist is 1-based
1958 if playliststart > 0:
1959 video_ids = video_ids[playliststart:]
1961 for id in video_ids:
1962 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1965 class YoutubeUserIE(InfoExtractor):
1966 """Information Extractor for YouTube users."""
1968 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1969 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1970 _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1973 def __init__(self, youtube_ie, downloader=None):
1974 InfoExtractor.__init__(self, downloader)
1975 self._youtube_ie = youtube_ie
1979 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1981 def report_download_page(self, username):
1982 """Report attempt to download user page."""
1983 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1985 def _real_initialize(self):
1986 self._youtube_ie.initialize()
1988 def _real_extract(self, url):
1990 mobj = re.match(self._VALID_URL, url)
1992 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1995 # Download user page
1996 username = mobj.group(1)
2000 self.report_download_page(username)
2001 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
2003 page = urllib2.urlopen(request).read()
2004 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2005 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2008 # Extract video identifiers
2011 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2012 if mobj.group(1) not in ids_in_page:
2013 ids_in_page.append(mobj.group(1))
2014 video_ids.extend(ids_in_page)
2016 playliststart = self._downloader.params.get('playliststart', 1)
2017 playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based
2018 if playliststart > 0:
2019 video_ids = video_ids[playliststart:]
2021 for id in video_ids:
2022 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2025 class PostProcessor(object):
2026 """Post Processor class.
2028 PostProcessor objects can be added to downloaders with their
2029 add_post_processor() method. When the downloader has finished a
2030 successful download, it will take its internal chain of PostProcessors
2031 and start calling the run() method on each one of them, first with
2032 an initial argument and then with the returned value of the previous
2035 The chain will be stopped if one of them ever returns None or the end
2036 of the chain is reached.
2038 PostProcessor objects follow a "mutual registration" process similar
2039 to InfoExtractor objects.
2044 def __init__(self, downloader=None):
2045 self._downloader = downloader
2047 def set_downloader(self, downloader):
2048 """Sets the downloader for this PP."""
2049 self._downloader = downloader
2051 def run(self, information):
2052 """Run the PostProcessor.
2054 The "information" argument is a dictionary like the ones
2055 composed by InfoExtractors. The only difference is that this
2056 one has an extra field called "filepath" that points to the
2059 When this method returns None, the postprocessing chain is
2060 stopped. However, this method may return an information
2061 dictionary that will be passed to the next postprocessing
2062 object in the chain. It can be the one it received after
2063 changing some fields.
2065 In addition, this method may raise a PostProcessingError
2066 exception that will be taken into account by the downloader
2069 return information # by default, do nothing
2071 ### MAIN PROGRAM ###
2072 if __name__ == '__main__':
2074 # Modules needed only when running the main program
2078 # Function to update the program file with the latest version from bitbucket.org
2079 def update_self(downloader, filename):
2080 # Note: downloader only used for options
2081 if not os.access (filename, os.W_OK):
2082 sys.exit('ERROR: no write permissions on %s' % filename)
2084 downloader.to_stdout('Updating to latest stable version...')
2085 latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
2086 latest_version = urllib.urlopen(latest_url).read().strip()
2087 prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2088 newcontent = urllib.urlopen(prog_url).read()
2089 stream = open(filename, 'w')
2090 stream.write(newcontent)
2092 downloader.to_stdout('Updated to version %s' % latest_version)
2094 # General configuration
2095 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2096 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
2097 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2099 # Parse command line
2100 parser = optparse.OptionParser(
2101 usage='Usage: %prog [options] url...',
2102 version='2010.08.04',
2103 conflict_handler='resolve',
2106 parser.add_option('-h', '--help',
2107 action='help', help='print this help text and exit')
2108 parser.add_option('-v', '--version',
2109 action='version', help='print program version and exit')
2110 parser.add_option('-U', '--update',
2111 action='store_true', dest='update_self', help='update this program to latest stable version')
2112 parser.add_option('-i', '--ignore-errors',
2113 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2114 parser.add_option('-r', '--rate-limit',
2115 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2116 parser.add_option('-R', '--retries',
2117 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2118 parser.add_option('--playlist-start',
2119 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2121 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2122 authentication.add_option('-u', '--username',
2123 dest='username', metavar='USERNAME', help='account username')
2124 authentication.add_option('-p', '--password',
2125 dest='password', metavar='PASSWORD', help='account password')
2126 authentication.add_option('-n', '--netrc',
2127 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2128 parser.add_option_group(authentication)
2130 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2131 video_format.add_option('-f', '--format',
2132 action='store', dest='format', metavar='FORMAT', help='video format code')
2133 video_format.add_option('-m', '--mobile-version',
2134 action='store_const', dest='format', help='alias for -f 17', const='17')
2135 video_format.add_option('--all-formats',
2136 action='store_const', dest='format', help='download all available video formats', const='-1')
2137 video_format.add_option('--max-quality',
2138 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2139 video_format.add_option('-b', '--best-quality',
2140 action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)')
2141 parser.add_option_group(video_format)
2143 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2144 verbosity.add_option('-q', '--quiet',
2145 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2146 verbosity.add_option('-s', '--simulate',
2147 action='store_true', dest='simulate', help='do not download video', default=False)
2148 verbosity.add_option('-g', '--get-url',
2149 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2150 verbosity.add_option('-e', '--get-title',
2151 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2152 verbosity.add_option('--get-thumbnail',
2153 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2154 verbosity.add_option('--get-description',
2155 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2156 verbosity.add_option('--no-progress',
2157 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2158 parser.add_option_group(verbosity)
2160 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2161 filesystem.add_option('-t', '--title',
2162 action='store_true', dest='usetitle', help='use title in file name', default=False)
2163 filesystem.add_option('-l', '--literal',
2164 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2165 filesystem.add_option('-o', '--output',
2166 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2167 filesystem.add_option('-a', '--batch-file',
2168 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2169 filesystem.add_option('-w', '--no-overwrites',
2170 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2171 filesystem.add_option('-c', '--continue',
2172 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2173 parser.add_option_group(filesystem)
2175 (opts, args) = parser.parse_args()
2177 # Batch file verification
2179 if opts.batchfile is not None:
2181 if opts.batchfile == '-':
2184 batchfd = open(opts.batchfile, 'r')
2185 batchurls = batchfd.readlines()
2186 batchurls = [x.strip() for x in batchurls]
2187 batchurls = [x for x in batchurls if len(x) > 0]
2189 sys.exit(u'ERROR: batch file could not be read')
2190 all_urls = batchurls + args
2192 # Conflicting, missing and erroneous options
2193 if opts.bestquality:
2194 print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n'
2195 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2196 parser.error(u'using .netrc conflicts with giving username/password')
2197 if opts.password is not None and opts.username is None:
2198 parser.error(u'account username missing')
2199 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
2200 parser.error(u'using output template conflicts with using title or literal title')
2201 if opts.usetitle and opts.useliteral:
2202 parser.error(u'using title conflicts with using literal title')
2203 if opts.username is not None and opts.password is None:
2204 opts.password = getpass.getpass(u'Type account password and press return:')
2205 if opts.ratelimit is not None:
2206 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2207 if numeric_limit is None:
2208 parser.error(u'invalid rate limit specified')
2209 opts.ratelimit = numeric_limit
2210 if opts.retries is not None:
2212 opts.retries = long(opts.retries)
2213 except (TypeError, ValueError), err:
2214 parser.error(u'invalid retry count specified')
2215 if opts.playliststart is not None:
2217 opts.playliststart = long(opts.playliststart)
2218 except (TypeError, ValueError), err:
2219 parser.error(u'invalid playlist page specified')
2221 # Information extractors
2222 youtube_ie = YoutubeIE()
2223 metacafe_ie = MetacafeIE(youtube_ie)
2224 dailymotion_ie = DailymotionIE()
2225 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2226 youtube_user_ie = YoutubeUserIE(youtube_ie)
2227 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2228 google_ie = GoogleIE()
2229 google_search_ie = GoogleSearchIE(google_ie)
2230 photobucket_ie = PhotobucketIE()
2231 yahoo_ie = YahooIE()
2232 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2233 generic_ie = GenericIE()
2236 fd = FileDownloader({
2237 'usenetrc': opts.usenetrc,
2238 'username': opts.username,
2239 'password': opts.password,
2240 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2241 'forceurl': opts.geturl,
2242 'forcetitle': opts.gettitle,
2243 'forcethumbnail': opts.getthumbnail,
2244 'forcedescription': opts.getdescription,
2245 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2246 'format': opts.format,
2247 'format_limit': opts.format_limit,
2248 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2249 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2250 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2251 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2252 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2253 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2254 or u'%(id)s.%(ext)s'),
2255 'ignoreerrors': opts.ignoreerrors,
2256 'ratelimit': opts.ratelimit,
2257 'nooverwrites': opts.nooverwrites,
2258 'retries': opts.retries,
2259 'continuedl': opts.continue_dl,
2260 'noprogress': opts.noprogress,
2261 'playliststart': opts.playliststart,
2263 fd.add_info_extractor(youtube_search_ie)
2264 fd.add_info_extractor(youtube_pl_ie)
2265 fd.add_info_extractor(youtube_user_ie)
2266 fd.add_info_extractor(metacafe_ie)
2267 fd.add_info_extractor(dailymotion_ie)
2268 fd.add_info_extractor(youtube_ie)
2269 fd.add_info_extractor(google_ie)
2270 fd.add_info_extractor(google_search_ie)
2271 fd.add_info_extractor(photobucket_ie)
2272 fd.add_info_extractor(yahoo_ie)
2273 fd.add_info_extractor(yahoo_search_ie)
2275 # This must come last since it's the
2276 # fallback if none of the others work
2277 fd.add_info_extractor(generic_ie)
2280 if opts.update_self:
2281 update_self(fd, sys.argv[0])
2284 if len(all_urls) < 1:
2285 if not opts.update_self:
2286 parser.error(u'you must provide at least one URL')
2289 retcode = fd.download(all_urls)
2292 except DownloadError:
2294 except SameFileError:
2295 sys.exit(u'ERROR: fixed output name but more than one file to download')
2296 except KeyboardInterrupt:
2297 sys.exit(u'\nERROR: Interrupted by user')