youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # Author: Ricardo Garcia Gonzalez
   4 # Author: Danny Colligan
   5 # Author: Benjamin Johnson
   6 # License: Public domain code
   7 import cookielib
   8 import htmlentitydefs
   9 import httplib
  10 import locale
  11 import math
  12 import netrc
  13 import os
  14 import os.path
  15 import re
  16 import socket
  17 import string
  18 import subprocess
  19 import sys
  20 import time
  21 import urllib
  22 import urllib2
  23
  24 # parse_qs was moved from the cgi module to the urlparse module recently.
  25 try:
  26         from urlparse import parse_qs
  27 except ImportError:
  28         from cgi import parse_qs
  29
  30 std_headers = {
  31         'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100723 Firefox/3.6.8',
  32         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  33         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  34         'Accept-Language': 'en-us,en;q=0.5',
  35 }
  36
  37 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  38
  39 def preferredencoding():
  40         """Get preferred encoding.
  41
  42         Returns the best encoding scheme for the system, based on
  43         locale.getpreferredencoding() and some further tweaks.
  44         """
  45         def yield_preferredencoding():
  46                 try:
  47                         pref = locale.getpreferredencoding()
  48                         u'TEST'.encode(pref)
  49                 except:
  50                         pref = 'UTF-8'
  51                 while True:
  52                         yield pref
  53         return yield_preferredencoding().next()
  54
  55 def htmlentity_transform(matchobj):
  56         """Transforms an HTML entity to a Unicode character.
  57
  58         This function receives a match object and is intended to be used with
  59         the re.sub() function.
  60         """
  61         entity = matchobj.group(1)
  62
  63         # Known non-numeric HTML entity
  64         if entity in htmlentitydefs.name2codepoint:
  65                 return unichr(htmlentitydefs.name2codepoint[entity])
  66
  67         # Unicode character
  68         mobj = re.match(ur'(?u)#(x?\d+)', entity)
  69         if mobj is not None:
  70                 numstr = mobj.group(1)
  71                 if numstr.startswith(u'x'):
  72                         base = 16
  73                         numstr = u'0%s' % numstr
  74                 else:
  75                         base = 10
  76                 return unichr(long(numstr, base))
  77
  78         # Unknown entity in name, return its literal representation
  79         return (u'&%s;' % entity)
  80
  81 def sanitize_title(utitle):
  82         """Sanitizes a video title so it could be used as part of a filename."""
  83         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
  84         return utitle.replace(unicode(os.sep), u'%')
  85
  86 def sanitize_open(filename, open_mode):
  87         """Try to open the given filename, and slightly tweak it if this fails.
  88
  89         Attempts to open the given filename. If this fails, it tries to change
  90         the filename slightly, step by step, until it's either able to open it
  91         or it fails and raises a final exception, like the standard open()
  92         function.
  93
  94         It returns the tuple (stream, definitive_file_name).
  95         """
  96         try:
  97                 if filename == u'-':
  98                         if sys.platform == 'win32':
  99                                 import msvcrt
 100                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 101                         return (sys.stdout, filename)
 102                 stream = open(filename, open_mode)
 103                 return (stream, filename)
 104         except (IOError, OSError), err:
 105                 # In case of error, try to remove win32 forbidden chars
 106                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
 107
 108                 # An exception here should be caught in the caller
 109                 stream = open(filename, open_mode)
 110                 return (stream, filename)
 111
 112
 113 class DownloadError(Exception):
 114         """Download Error exception.
 115
 116         This exception may be thrown by FileDownloader objects if they are not
 117         configured to continue on errors. They will contain the appropriate
 118         error message.
 119         """
 120         pass
 121
 122 class SameFileError(Exception):
 123         """Same File exception.
 124
 125         This exception will be thrown by FileDownloader objects if they detect
 126         multiple files would have to be downloaded to the same file on disk.
 127         """
 128         pass
 129
 130 class PostProcessingError(Exception):
 131         """Post Processing exception.
 132
 133         This exception may be raised by PostProcessor's .run() method to
 134         indicate an error in the postprocessing task.
 135         """
 136         pass
 137
 138 class UnavailableVideoError(Exception):
 139         """Unavailable Format exception.
 140
 141         This exception will be thrown when a video is requested
 142         in a format that is not available for that video.
 143         """
 144         pass
 145
 146 class ContentTooShortError(Exception):
 147         """Content Too Short exception.
 148
 149         This exception may be raised by FileDownloader objects when a file they
 150         download is too small for what the server announced first, indicating
 151         the connection was probably interrupted.
 152         """
 153         # Both in bytes
 154         downloaded = None
 155         expected = None
 156
 157         def __init__(self, downloaded, expected):
 158                 self.downloaded = downloaded
 159                 self.expected = expected
 160
 161 class FileDownloader(object):
 162         """File Downloader class.
 163
 164         File downloader objects are the ones responsible of downloading the
 165         actual video file and writing it to disk if the user has requested
 166         it, among some other tasks. In most cases there should be one per
 167         program. As, given a video URL, the downloader doesn't know how to
 168         extract all the needed information, task that InfoExtractors do, it
 169         has to pass the URL to one of them.
 170
 171         For this, file downloader objects have a method that allows
 172         InfoExtractors to be registered in a given order. When it is passed
 173         a URL, the file downloader handles it to the first InfoExtractor it
 174         finds that reports being able to handle it. The InfoExtractor extracts
 175         all the information about the video or videos the URL refers to, and
 176         asks the FileDownloader to process the video information, possibly
 177         downloading the video.
 178
 179         File downloaders accept a lot of parameters. In order not to saturate
 180         the object constructor with arguments, it receives a dictionary of
 181         options instead. These options are available through the params
 182         attribute for the InfoExtractors to use. The FileDownloader also
 183         registers itself as the downloader in charge for the InfoExtractors
 184         that are added to it, so this is a "mutual registration".
 185
 186         Available options:
 187
 188         username:         Username for authentication purposes.
 189         password:         Password for authentication purposes.
 190         usenetrc:         Use netrc for authentication instead.
 191         quiet:            Do not print messages to stdout.
 192         forceurl:         Force printing final URL.
 193         forcetitle:       Force printing title.
 194         forcethumbnail:   Force printing thumbnail URL.
 195         forcedescription: Force printing description.
 196         simulate:         Do not download the video files.
 197         format:           Video format code.
 198         format_limit:     Highest quality format to try.
 199         outtmpl:          Template for output names.
 200         ignoreerrors:     Do not stop on download errors.
 201         ratelimit:        Download speed limit, in bytes/sec.
 202         nooverwrites:     Prevent overwriting files.
 203         retries:          Number of times to retry for HTTP error 5xx
 204         continuedl:       Try to continue downloads if possible.
 205         noprogress:       Do not print the progress bar.
 206         playliststart:    Playlist item to start at.
 207         """
 208
 209         params = None
 210         _ies = []
 211         _pps = []
 212         _download_retcode = None
 213         _num_downloads = None
 214
 215         def __init__(self, params):
 216                 """Create a FileDownloader object with the given options."""
 217                 self._ies = []
 218                 self._pps = []
 219                 self._download_retcode = 0
 220                 self._num_downloads = 0
 221                 self.params = params
 222
 223         @staticmethod
 224         def pmkdir(filename):
 225                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 226                 components = filename.split(os.sep)
 227                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 228                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 229                 for dir in aggregate:
 230                         if not os.path.exists(dir):
 231                                 os.mkdir(dir)
 232
 233         @staticmethod
 234         def format_bytes(bytes):
 235                 if bytes is None:
 236                         return 'N/A'
 237                 if type(bytes) is str:
 238                         bytes = float(bytes)
 239                 if bytes == 0.0:
 240                         exponent = 0
 241                 else:
 242                         exponent = long(math.log(bytes, 1024.0))
 243                 suffix = 'bkMGTPEZY'[exponent]
 244                 converted = float(bytes) / float(1024**exponent)
 245                 return '%.2f%s' % (converted, suffix)
 246
 247         @staticmethod
 248         def calc_percent(byte_counter, data_len):
 249                 if data_len is None:
 250                         return '---.-%'
 251                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 252
 253         @staticmethod
 254         def calc_eta(start, now, total, current):
 255                 if total is None:
 256                         return '--:--'
 257                 dif = now - start
 258                 if current == 0 or dif < 0.001: # One millisecond
 259                         return '--:--'
 260                 rate = float(current) / dif
 261                 eta = long((float(total) - float(current)) / rate)
 262                 (eta_mins, eta_secs) = divmod(eta, 60)
 263                 if eta_mins > 99:
 264                         return '--:--'
 265                 return '%02d:%02d' % (eta_mins, eta_secs)
 266
 267         @staticmethod
 268         def calc_speed(start, now, bytes):
 269                 dif = now - start
 270                 if bytes == 0 or dif < 0.001: # One millisecond
 271                         return '%10s' % '---b/s'
 272                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 273
 274         @staticmethod
 275         def best_block_size(elapsed_time, bytes):
 276                 new_min = max(bytes / 2.0, 1.0)
 277                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 278                 if elapsed_time < 0.001:
 279                         return long(new_max)
 280                 rate = bytes / elapsed_time
 281                 if rate > new_max:
 282                         return long(new_max)
 283                 if rate < new_min:
 284                         return long(new_min)
 285                 return long(rate)
 286
 287         @staticmethod
 288         def parse_bytes(bytestr):
 289                 """Parse a string indicating a byte quantity into a long integer."""
 290                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 291                 if matchobj is None:
 292                         return None
 293                 number = float(matchobj.group(1))
 294                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 295                 return long(round(number * multiplier))
 296
 297         def add_info_extractor(self, ie):
 298                 """Add an InfoExtractor object to the end of the list."""
 299                 self._ies.append(ie)
 300                 ie.set_downloader(self)
 301
 302         def add_post_processor(self, pp):
 303                 """Add a PostProcessor object to the end of the chain."""
 304                 self._pps.append(pp)
 305                 pp.set_downloader(self)
 306
 307         def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False):
 308                 """Print message to stdout if not in quiet mode."""
 309                 try:
 310                         if not self.params.get('quiet', False):
 311                                 print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
 312                         sys.stdout.flush()
 313                 except (UnicodeEncodeError), err:
 314                         if not ignore_encoding_errors:
 315                                 raise
 316
 317         def to_stderr(self, message):
 318                 """Print message to stderr."""
 319                 print >>sys.stderr, message.encode(preferredencoding())
 320
 321         def fixed_template(self):
 322                 """Checks if the output template is fixed."""
 323                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 324
 325         def trouble(self, message=None):
 326                 """Determine action to take when a download problem appears.
 327
 328                 Depending on if the downloader has been configured to ignore
 329                 download errors or not, this method may throw an exception or
 330                 not when errors are found, after printing the message.
 331                 """
 332                 if message is not None:
 333                         self.to_stderr(message)
 334                 if not self.params.get('ignoreerrors', False):
 335                         raise DownloadError(message)
 336                 self._download_retcode = 1
 337
 338         def slow_down(self, start_time, byte_counter):
 339                 """Sleep if the download speed is over the rate limit."""
 340                 rate_limit = self.params.get('ratelimit', None)
 341                 if rate_limit is None or byte_counter == 0:
 342                         return
 343                 now = time.time()
 344                 elapsed = now - start_time
 345                 if elapsed <= 0.0:
 346                         return
 347                 speed = float(byte_counter) / elapsed
 348                 if speed > rate_limit:
 349                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 350
 351         def report_destination(self, filename):
 352                 """Report destination filename."""
 353                 self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
 354
 355         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 356                 """Report download progress."""
 357                 if self.params.get('noprogress', False):
 358                         return
 359                 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
 360                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 361
 362         def report_resuming_byte(self, resume_len):
 363                 """Report attempt to resume at given byte."""
 364                 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
 365
 366         def report_retry(self, count, retries):
 367                 """Report retry in case of HTTP error 5xx"""
 368                 self.to_stdout(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 369
 370         def report_file_already_downloaded(self, file_name):
 371                 """Report file has already been fully downloaded."""
 372                 try:
 373                         self.to_stdout(u'[download] %s has already been downloaded' % file_name)
 374                 except (UnicodeEncodeError), err:
 375                         self.to_stdout(u'[download] The file has already been downloaded')
 376
 377         def report_unable_to_resume(self):
 378                 """Report it was impossible to resume download."""
 379                 self.to_stdout(u'[download] Unable to resume')
 380
 381         def report_finish(self):
 382                 """Report download finished."""
 383                 if self.params.get('noprogress', False):
 384                         self.to_stdout(u'[download] Download completed')
 385                 else:
 386                         self.to_stdout(u'')
 387
 388         def increment_downloads(self):
 389                 """Increment the ordinal that assigns a number to each file."""
 390                 self._num_downloads += 1
 391
 392         def process_info(self, info_dict):
 393                 """Process a single dictionary returned by an InfoExtractor."""
 394                 # Do nothing else if in simulate mode
 395                 if self.params.get('simulate', False):
 396                         # Forced printings
 397                         if self.params.get('forcetitle', False):
 398                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 399                         if self.params.get('forceurl', False):
 400                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 401                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 402                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
 403                         if self.params.get('forcedescription', False) and 'description' in info_dict:
 404                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
 405
 406                         return
 407
 408                 try:
 409                         template_dict = dict(info_dict)
 410                         template_dict['epoch'] = unicode(long(time.time()))
 411                         template_dict['ord'] = unicode('%05d' % self._num_downloads)
 412                         filename = self.params['outtmpl'] % template_dict
 413                 except (ValueError, KeyError), err:
 414                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
 415                         return
 416                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 417                         self.to_stderr(u'WARNING: file exists and will be skipped')
 418                         return
 419
 420                 try:
 421                         self.pmkdir(filename)
 422                 except (OSError, IOError), err:
 423                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
 424                         return
 425
 426                 try:
 427                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
 428                 except (OSError, IOError), err:
 429                         raise UnavailableVideoError
 430                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 431                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 432                         return
 433                 except (ContentTooShortError, ), err:
 434                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 435                         return
 436
 437                 if success:
 438                         try:
 439                                 self.post_process(filename, info_dict)
 440                         except (PostProcessingError), err:
 441                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
 442                                 return
 443
 444         def download(self, url_list):
 445                 """Download a given list of URLs."""
 446                 if len(url_list) > 1 and self.fixed_template():
 447                         raise SameFileError(self.params['outtmpl'])
 448
 449                 for url in url_list:
 450                         suitable_found = False
 451                         for ie in self._ies:
 452                                 # Go to next InfoExtractor if not suitable
 453                                 if not ie.suitable(url):
 454                                         continue
 455
 456                                 # Suitable InfoExtractor found
 457                                 suitable_found = True
 458
 459                                 # Extract information from URL and process it
 460                                 ie.extract(url)
 461
 462                                 # Suitable InfoExtractor had been found; go to next URL
 463                                 break
 464
 465                         if not suitable_found:
 466                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 467
 468                 return self._download_retcode
 469
 470         def post_process(self, filename, ie_info):
 471                 """Run the postprocessing chain on the given file."""
 472                 info = dict(ie_info)
 473                 info['filepath'] = filename
 474                 for pp in self._pps:
 475                         info = pp.run(info)
 476                         if info is None:
 477                                 break
 478
 479         def _download_with_rtmpdump(self, filename, url, player_url):
 480                 self.report_destination(filename)
 481
 482                 # Check for rtmpdump first
 483                 try:
 484                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 485                 except (OSError, IOError):
 486                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 487                         return False
 488
 489                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 490                 # the connection was interrumpted and resuming appears to be
 491                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 492                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename]
 493                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 494                 while retval == 2 or retval == 1:
 495                         prevsize = os.path.getsize(filename)
 496                         self.to_stdout(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 497                         time.sleep(5.0) # This seems to be needed
 498                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 499                         cursize = os.path.getsize(filename)
 500                         if prevsize == cursize and retval == 1:
 501                                 break
 502                 if retval == 0:
 503                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
 504                         return True
 505                 else:
 506                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 507                         return False
 508
 509         def _do_download(self, filename, url, player_url):
 510                 # Attempt to download using rtmpdump
 511                 if url.startswith('rtmp'):
 512                         return self._download_with_rtmpdump(filename, url, player_url)
 513
 514                 stream = None
 515                 open_mode = 'wb'
 516                 basic_request = urllib2.Request(url, None, std_headers)
 517                 request = urllib2.Request(url, None, std_headers)
 518
 519                 # Establish possible resume length
 520                 if os.path.isfile(filename):
 521                         resume_len = os.path.getsize(filename)
 522                 else:
 523                         resume_len = 0
 524
 525                 # Request parameters in case of being able to resume
 526                 if self.params.get('continuedl', False) and resume_len != 0:
 527                         self.report_resuming_byte(resume_len)
 528                         request.add_header('Range','bytes=%d-' % resume_len)
 529                         open_mode = 'ab'
 530
 531                 count = 0
 532                 retries = self.params.get('retries', 0)
 533                 while count <= retries:
 534                         # Establish connection
 535                         try:
 536                                 data = urllib2.urlopen(request)
 537                                 break
 538                         except (urllib2.HTTPError, ), err:
 539                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 540                                         # Unexpected HTTP error
 541                                         raise
 542                                 elif err.code == 416:
 543                                         # Unable to resume (requested range not satisfiable)
 544                                         try:
 545                                                 # Open the connection again without the range header
 546                                                 data = urllib2.urlopen(basic_request)
 547                                                 content_length = data.info()['Content-Length']
 548                                         except (urllib2.HTTPError, ), err:
 549                                                 if err.code < 500 or err.code >= 600:
 550                                                         raise
 551                                         else:
 552                                                 # Examine the reported length
 553                                                 if (content_length is not None and
 554                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
 555                                                         # The file had already been fully downloaded.
 556                                                         # Explanation to the above condition: in issue #175 it was revealed that
 557                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
 558                                                         # changing the file size slightly and causing problems for some users. So
 559                                                         # I decided to implement a suggested change and consider the file
 560                                                         # completely downloaded if the file size differs less than 100 bytes from
 561                                                         # the one in the hard drive.
 562                                                         self.report_file_already_downloaded(filename)
 563                                                         return True
 564                                                 else:
 565                                                         # The length does not match, we start the download over
 566                                                         self.report_unable_to_resume()
 567                                                         open_mode = 'wb'
 568                                                         break
 569                         # Retry
 570                         count += 1
 571                         if count <= retries:
 572                                 self.report_retry(count, retries)
 573
 574                 if count > retries:
 575                         self.trouble(u'ERROR: giving up after %s retries' % retries)
 576                         return False
 577
 578                 data_len = data.info().get('Content-length', None)
 579                 data_len_str = self.format_bytes(data_len)
 580                 byte_counter = 0
 581                 block_size = 1024
 582                 start = time.time()
 583                 while True:
 584                         # Download and write
 585                         before = time.time()
 586                         data_block = data.read(block_size)
 587                         after = time.time()
 588                         data_block_len = len(data_block)
 589                         if data_block_len == 0:
 590                                 break
 591                         byte_counter += data_block_len
 592
 593                         # Open file just in time
 594                         if stream is None:
 595                                 try:
 596                                         (stream, filename) = sanitize_open(filename, open_mode)
 597                                         self.report_destination(filename)
 598                                 except (OSError, IOError), err:
 599                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 600                                         return False
 601                         try:
 602                                 stream.write(data_block)
 603                         except (IOError, OSError), err:
 604                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 605                                 return False
 606                         block_size = self.best_block_size(after - before, data_block_len)
 607
 608                         # Progress message
 609                         percent_str = self.calc_percent(byte_counter, data_len)
 610                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
 611                         speed_str = self.calc_speed(start, time.time(), byte_counter)
 612                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 613
 614                         # Apply rate limit
 615                         self.slow_down(start, byte_counter)
 616
 617                 self.report_finish()
 618                 if data_len is not None and str(byte_counter) != data_len:
 619                         raise ContentTooShortError(byte_counter, long(data_len))
 620                 return True
 621
 622 class InfoExtractor(object):
 623         """Information Extractor class.
 624
 625         Information extractors are the classes that, given a URL, extract
 626         information from the video (or videos) the URL refers to. This
 627         information includes the real video URL, the video title and simplified
 628         title, author and others. The information is stored in a dictionary
 629         which is then passed to the FileDownloader. The FileDownloader
 630         processes this information possibly downloading the video to the file
 631         system, among other possible outcomes. The dictionaries must include
 632         the following fields:
 633
 634         id:             Video identifier.
 635         url:            Final video URL.
 636         uploader:       Nickname of the video uploader.
 637         title:          Literal title.
 638         stitle:         Simplified title.
 639         ext:            Video filename extension.
 640         format:         Video format.
 641         player_url:     SWF Player URL (may be None).
 642
 643         The following fields are optional. Their primary purpose is to allow
 644         youtube-dl to serve as the backend for a video search function, such
 645         as the one in youtube2mp3.  They are only used when their respective
 646         forced printing functions are called:
 647
 648         thumbnail:      Full URL to a video thumbnail image.
 649         description:    One-line video description.
 650
 651         Subclasses of this one should re-define the _real_initialize() and
 652         _real_extract() methods, as well as the suitable() static method.
 653         Probably, they should also be instantiated and added to the main
 654         downloader.
 655         """
 656
 657         _ready = False
 658         _downloader = None
 659
 660         def __init__(self, downloader=None):
 661                 """Constructor. Receives an optional downloader."""
 662                 self._ready = False
 663                 self.set_downloader(downloader)
 664
 665         @staticmethod
 666         def suitable(url):
 667                 """Receives a URL and returns True if suitable for this IE."""
 668                 return False
 669
 670         def initialize(self):
 671                 """Initializes an instance (authentication, etc)."""
 672                 if not self._ready:
 673                         self._real_initialize()
 674                         self._ready = True
 675
 676         def extract(self, url):
 677                 """Extracts URL information and returns it in list of dicts."""
 678                 self.initialize()
 679                 return self._real_extract(url)
 680
 681         def set_downloader(self, downloader):
 682                 """Sets the downloader for this IE."""
 683                 self._downloader = downloader
 684
 685         def _real_initialize(self):
 686                 """Real initialization process. Redefine in subclasses."""
 687                 pass
 688
 689         def _real_extract(self, url):
 690                 """Real extraction process. Redefine in subclasses."""
 691                 pass
 692
 693 class YoutubeIE(InfoExtractor):
 694         """Information extractor for youtube.com."""
 695
 696         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
 697         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 698         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
 699         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 700         _NETRC_MACHINE = 'youtube'
 701         # Listed in order of quality
 702         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
 703         _video_extensions = {
 704                 '13': '3gp',
 705                 '17': 'mp4',
 706                 '18': 'mp4',
 707                 '22': 'mp4',
 708                 '37': 'mp4',
 709                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
 710                 '43': 'webm',
 711                 '45': 'webm',
 712         }
 713
 714         @staticmethod
 715         def suitable(url):
 716                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 717
 718         def report_lang(self):
 719                 """Report attempt to set language."""
 720                 self._downloader.to_stdout(u'[youtube] Setting language')
 721
 722         def report_login(self):
 723                 """Report attempt to log in."""
 724                 self._downloader.to_stdout(u'[youtube] Logging in')
 725
 726         def report_age_confirmation(self):
 727                 """Report attempt to confirm age."""
 728                 self._downloader.to_stdout(u'[youtube] Confirming age')
 729
 730         def report_video_webpage_download(self, video_id):
 731                 """Report attempt to download video webpage."""
 732                 self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
 733
 734         def report_video_info_webpage_download(self, video_id):
 735                 """Report attempt to download video info webpage."""
 736                 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
 737
 738         def report_information_extraction(self, video_id):
 739                 """Report attempt to extract video information."""
 740                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
 741
 742         def report_unavailable_format(self, video_id, format):
 743                 """Report extracted video URL."""
 744                 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
 745
 746         def report_rtmp_download(self):
 747                 """Indicate the download will use the RTMP protocol."""
 748                 self._downloader.to_stdout(u'[youtube] RTMP download detected')
 749
 750         def _real_initialize(self):
 751                 if self._downloader is None:
 752                         return
 753
 754                 username = None
 755                 password = None
 756                 downloader_params = self._downloader.params
 757
 758                 # Attempt to use provided username and password or .netrc data
 759                 if downloader_params.get('username', None) is not None:
 760                         username = downloader_params['username']
 761                         password = downloader_params['password']
 762                 elif downloader_params.get('usenetrc', False):
 763                         try:
 764                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 765                                 if info is not None:
 766                                         username = info[0]
 767                                         password = info[2]
 768                                 else:
 769                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 770                         except (IOError, netrc.NetrcParseError), err:
 771                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 772                                 return
 773
 774                 # Set language
 775                 request = urllib2.Request(self._LANG_URL, None, std_headers)
 776                 try:
 777                         self.report_lang()
 778                         urllib2.urlopen(request).read()
 779                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 780                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 781                         return
 782
 783                 # No authentication to be performed
 784                 if username is None:
 785                         return
 786
 787                 # Log in
 788                 login_form = {
 789                                 'current_form': 'loginForm',
 790                                 'next':         '/',
 791                                 'action_login': 'Log In',
 792                                 'username':     username,
 793                                 'password':     password,
 794                                 }
 795                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
 796                 try:
 797                         self.report_login()
 798                         login_results = urllib2.urlopen(request).read()
 799                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 800                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 801                                 return
 802                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 803                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 804                         return
 805
 806                 # Confirm age
 807                 age_form = {
 808                                 'next_url':             '/',
 809                                 'action_confirm':       'Confirm',
 810                                 }
 811                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
 812                 try:
 813                         self.report_age_confirmation()
 814                         age_results = urllib2.urlopen(request).read()
 815                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 816                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 817                         return
 818
 819         def _real_extract(self, url):
 820                 # Extract video id from URL
 821                 mobj = re.match(self._VALID_URL, url)
 822                 if mobj is None:
 823                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 824                         return
 825                 video_id = mobj.group(2)
 826
 827                 # Get video webpage
 828                 self.report_video_webpage_download(video_id)
 829                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers)
 830                 try:
 831                         video_webpage = urllib2.urlopen(request).read()
 832                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 833                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
 834                         return
 835
 836                 # Attempt to extract SWF player URL
 837                 mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage)
 838                 if mobj is not None:
 839                         player_url = mobj.group(1)
 840                 else:
 841                         player_url = None
 842
 843                 # Get video info
 844                 self.report_video_info_webpage_download(video_id)
 845                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
 846                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
 847                                            % (video_id, el_type))
 848                         request = urllib2.Request(video_info_url, None, std_headers)
 849                         try:
 850                                 video_info_webpage = urllib2.urlopen(request).read()
 851                                 video_info = parse_qs(video_info_webpage)
 852                                 if 'token' in video_info:
 853                                         break
 854                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 855                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
 856                                 return
 857                 if 'token' not in video_info:
 858                         if 'reason' in video_info:
 859                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
 860                         else:
 861                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
 862                         return
 863
 864                 # Start extracting information
 865                 self.report_information_extraction(video_id)
 866
 867                 # uploader
 868                 if 'author' not in video_info:
 869                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 870                         return
 871                 video_uploader = urllib.unquote_plus(video_info['author'][0])
 872
 873                 # title
 874                 if 'title' not in video_info:
 875                         self._downloader.trouble(u'ERROR: unable to extract video title')
 876                         return
 877                 video_title = urllib.unquote_plus(video_info['title'][0])
 878                 video_title = video_title.decode('utf-8')
 879                 video_title = sanitize_title(video_title)
 880
 881                 # simplified title
 882                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 883                 simple_title = simple_title.strip(ur'_')
 884
 885                 # thumbnail image
 886                 if 'thumbnail_url' not in video_info:
 887                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
 888                         video_thumbnail = ''
 889                 else:   # don't panic if we can't find it
 890                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
 891
 892                 # description
 893                 video_description = 'No description available.'
 894                 if self._downloader.params.get('forcedescription', False):
 895                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
 896                         if mobj is not None:
 897                                 video_description = mobj.group(1)
 898
 899                 # token
 900                 video_token = urllib.unquote_plus(video_info['token'][0])
 901
 902                 # Decide which formats to download
 903                 requested_format = self._downloader.params.get('format', None)
 904                 get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
 905
 906                 if 'fmt_url_map' in video_info:
 907                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
 908                         format_limit = self._downloader.params.get('format_limit', None)
 909                         if format_limit is not None and format_limit in self._available_formats:
 910                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
 911                         else:
 912                                 format_list = self._available_formats
 913                         existing_formats = [x for x in format_list if x in url_map]
 914                         if len(existing_formats) == 0:
 915                                 self._downloader.trouble(u'ERROR: no known formats available for video')
 916                                 return
 917                         if requested_format is None:
 918                                 video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality
 919                         elif requested_format == '-1':
 920                                 video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats
 921                         else:
 922                                 video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format
 923
 924                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
 925                         self.report_rtmp_download()
 926                         video_url_list = [(None, video_info['conn'][0])]
 927
 928                 else:
 929                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
 930                         return
 931
 932                 for format_param, video_real_url in video_url_list:
 933                         # At this point we have a new video
 934                         self._downloader.increment_downloads()
 935
 936                         # Extension
 937                         video_extension = self._video_extensions.get(format_param, 'flv')
 938
 939                         # Find the video URL in fmt_url_map or conn paramters
 940                         try:
 941                                 # Process video information
 942                                 self._downloader.process_info({
 943                                         'id':           video_id.decode('utf-8'),
 944                                         'url':          video_real_url.decode('utf-8'),
 945                                         'uploader':     video_uploader.decode('utf-8'),
 946                                         'title':        video_title,
 947                                         'stitle':       simple_title,
 948                                         'ext':          video_extension.decode('utf-8'),
 949                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
 950                                         'thumbnail':    video_thumbnail.decode('utf-8'),
 951                                         'description':  video_description.decode('utf-8'),
 952                                         'player_url':   player_url,
 953                                 })
 954                         except UnavailableVideoError, err:
 955                                 self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
 956
 957
 958 class MetacafeIE(InfoExtractor):
 959         """Information Extractor for metacafe.com."""
 960
 961         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
 962         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
 963         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
 964         _youtube_ie = None
 965
 966         def __init__(self, youtube_ie, downloader=None):
 967                 InfoExtractor.__init__(self, downloader)
 968                 self._youtube_ie = youtube_ie
 969
 970         @staticmethod
 971         def suitable(url):
 972                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
 973
 974         def report_disclaimer(self):
 975                 """Report disclaimer retrieval."""
 976                 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
 977
 978         def report_age_confirmation(self):
 979                 """Report attempt to confirm age."""
 980                 self._downloader.to_stdout(u'[metacafe] Confirming age')
 981
 982         def report_download_webpage(self, video_id):
 983                 """Report webpage download."""
 984                 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
 985
 986         def report_extraction(self, video_id):
 987                 """Report information extraction."""
 988                 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
 989
 990         def _real_initialize(self):
 991                 # Retrieve disclaimer
 992                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
 993                 try:
 994                         self.report_disclaimer()
 995                         disclaimer = urllib2.urlopen(request).read()
 996                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 997                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
 998                         return
 999
1000                 # Confirm age
1001                 disclaimer_form = {
1002                         'filters': '0',
1003                         'submit': "Continue - I'm over 18",
1004                         }
1005                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
1006                 try:
1007                         self.report_age_confirmation()
1008                         disclaimer = urllib2.urlopen(request).read()
1009                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1010                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1011                         return
1012
1013         def _real_extract(self, url):
1014                 # Extract id and simplified title from URL
1015                 mobj = re.match(self._VALID_URL, url)
1016                 if mobj is None:
1017                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1018                         return
1019
1020                 video_id = mobj.group(1)
1021
1022                 # Check if video comes from YouTube
1023                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1024                 if mobj2 is not None:
1025                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1026                         return
1027
1028                 # At this point we have a new video
1029                 self._downloader.increment_downloads()
1030
1031                 simple_title = mobj.group(2).decode('utf-8')
1032
1033                 # Retrieve video webpage to extract further information
1034                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1035                 try:
1036                         self.report_download_webpage(video_id)
1037                         webpage = urllib2.urlopen(request).read()
1038                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1039                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1040                         return
1041
1042                 # Extract URL, uploader and title from webpage
1043                 self.report_extraction(video_id)
1044                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1045                 if mobj is not None:
1046                         mediaURL = urllib.unquote(mobj.group(1))
1047                         video_extension = mediaURL[-3:]
1048
1049                         # Extract gdaKey if available
1050                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1051                         if mobj is None:
1052                                 video_url = mediaURL
1053                         else:
1054                                 gdaKey = mobj.group(1)
1055                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1056                 else:
1057                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1058                         if mobj is None:
1059                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1060                                 return
1061                         vardict = parse_qs(mobj.group(1))
1062                         if 'mediaData' not in vardict:
1063                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1064                                 return
1065                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1066                         if mobj is None:
1067                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1068                                 return
1069                         mediaURL = mobj.group(1).replace('\\/', '/')
1070                         video_extension = mediaURL[-3:]
1071                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1072
1073                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1074                 if mobj is None:
1075                         self._downloader.trouble(u'ERROR: unable to extract title')
1076                         return
1077                 video_title = mobj.group(1).decode('utf-8')
1078                 video_title = sanitize_title(video_title)
1079
1080                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1081                 if mobj is None:
1082                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1083                         return
1084                 video_uploader = mobj.group(1)
1085
1086                 try:
1087                         # Process video information
1088                         self._downloader.process_info({
1089                                 'id':           video_id.decode('utf-8'),
1090                                 'url':          video_url.decode('utf-8'),
1091                                 'uploader':     video_uploader.decode('utf-8'),
1092                                 'title':        video_title,
1093                                 'stitle':       simple_title,
1094                                 'ext':          video_extension.decode('utf-8'),
1095                                 'format':       u'NA',
1096                                 'player_url':   None,
1097                         })
1098                 except UnavailableVideoError:
1099                         self._downloader.trouble(u'ERROR: unable to download video')
1100
1101
1102 class DailymotionIE(InfoExtractor):
1103         """Information Extractor for Dailymotion"""
1104
1105         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1106
1107         def __init__(self, downloader=None):
1108                 InfoExtractor.__init__(self, downloader)
1109
1110         @staticmethod
1111         def suitable(url):
1112                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1113
1114         def report_download_webpage(self, video_id):
1115                 """Report webpage download."""
1116                 self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id)
1117
1118         def report_extraction(self, video_id):
1119                 """Report information extraction."""
1120                 self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id)
1121
1122         def _real_initialize(self):
1123                 return
1124
1125         def _real_extract(self, url):
1126                 # Extract id and simplified title from URL
1127                 mobj = re.match(self._VALID_URL, url)
1128                 if mobj is None:
1129                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1130                         return
1131
1132                 # At this point we have a new video
1133                 self._downloader.increment_downloads()
1134                 video_id = mobj.group(1)
1135
1136                 simple_title = mobj.group(2).decode('utf-8')
1137                 video_extension = 'flv'
1138
1139                 # Retrieve video webpage to extract further information
1140                 request = urllib2.Request(url)
1141                 try:
1142                         self.report_download_webpage(video_id)
1143                         webpage = urllib2.urlopen(request).read()
1144                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1145                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1146                         return
1147
1148                 # Extract URL, uploader and title from webpage
1149                 self.report_extraction(video_id)
1150                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1151                 if mobj is None:
1152                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1153                         return
1154                 mediaURL = urllib.unquote(mobj.group(1))
1155
1156                 # if needed add http://www.dailymotion.com/ if relative URL
1157
1158                 video_url = mediaURL
1159
1160                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1161                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1162                 if mobj is None:
1163                         self._downloader.trouble(u'ERROR: unable to extract title')
1164                         return
1165                 video_title = mobj.group(1).decode('utf-8')
1166                 video_title = sanitize_title(video_title)
1167
1168                 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
1169                 if mobj is None:
1170                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1171                         return
1172                 video_uploader = mobj.group(1)
1173
1174                 try:
1175                         # Process video information
1176                         self._downloader.process_info({
1177                                 'id':           video_id.decode('utf-8'),
1178                                 'url':          video_url.decode('utf-8'),
1179                                 'uploader':     video_uploader.decode('utf-8'),
1180                                 'title':        video_title,
1181                                 'stitle':       simple_title,
1182                                 'ext':          video_extension.decode('utf-8'),
1183                                 'format':       u'NA',
1184                                 'player_url':   None,
1185                         })
1186                 except UnavailableVideoError:
1187                         self._downloader.trouble(u'ERROR: unable to download video')
1188
1189 class GoogleIE(InfoExtractor):
1190         """Information extractor for video.google.com."""
1191
1192         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1193
1194         def __init__(self, downloader=None):
1195                 InfoExtractor.__init__(self, downloader)
1196
1197         @staticmethod
1198         def suitable(url):
1199                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1200
1201         def report_download_webpage(self, video_id):
1202                 """Report webpage download."""
1203                 self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
1204
1205         def report_extraction(self, video_id):
1206                 """Report information extraction."""
1207                 self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
1208
1209         def _real_initialize(self):
1210                 return
1211
1212         def _real_extract(self, url):
1213                 # Extract id from URL
1214                 mobj = re.match(self._VALID_URL, url)
1215                 if mobj is None:
1216                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1217                         return
1218
1219                 # At this point we have a new video
1220                 self._downloader.increment_downloads()
1221                 video_id = mobj.group(1)
1222
1223                 video_extension = 'mp4'
1224
1225                 # Retrieve video webpage to extract further information
1226                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1227                 try:
1228                         self.report_download_webpage(video_id)
1229                         webpage = urllib2.urlopen(request).read()
1230                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1231                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1232                         return
1233
1234                 # Extract URL, uploader, and title from webpage
1235                 self.report_extraction(video_id)
1236                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1237                 if mobj is None:
1238                         video_extension = 'flv'
1239                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1240                 if mobj is None:
1241                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1242                         return
1243                 mediaURL = urllib.unquote(mobj.group(1))
1244                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1245                 mediaURL = mediaURL.replace('\\x26', '\x26')
1246
1247                 video_url = mediaURL
1248
1249                 mobj = re.search(r'<title>(.*)</title>', webpage)
1250                 if mobj is None:
1251                         self._downloader.trouble(u'ERROR: unable to extract title')
1252                         return
1253                 video_title = mobj.group(1).decode('utf-8')
1254                 video_title = sanitize_title(video_title)
1255                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1256
1257                 # Extract video description
1258                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1259                 if mobj is None:
1260                         self._downloader.trouble(u'ERROR: unable to extract video description')
1261                         return
1262                 video_description = mobj.group(1).decode('utf-8')
1263                 if not video_description:
1264                         video_description = 'No description available.'
1265
1266                 # Extract video thumbnail
1267                 if self._downloader.params.get('forcethumbnail', False):
1268                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1269                         try:
1270                                 webpage = urllib2.urlopen(request).read()
1271                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1272                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1273                                 return
1274                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1275                         if mobj is None:
1276                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1277                                 return
1278                         video_thumbnail = mobj.group(1)
1279                 else:   # we need something to pass to process_info
1280                         video_thumbnail = ''
1281
1282
1283                 try:
1284                         # Process video information
1285                         self._downloader.process_info({
1286                                 'id':           video_id.decode('utf-8'),
1287                                 'url':          video_url.decode('utf-8'),
1288                                 'uploader':     u'NA',
1289                                 'title':        video_title,
1290                                 'stitle':       simple_title,
1291                                 'ext':          video_extension.decode('utf-8'),
1292                                 'format':       u'NA',
1293                                 'player_url':   None,
1294                         })
1295                 except UnavailableVideoError:
1296                         self._downloader.trouble(u'ERROR: unable to download video')
1297
1298
1299 class PhotobucketIE(InfoExtractor):
1300         """Information extractor for photobucket.com."""
1301
1302         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1303
1304         def __init__(self, downloader=None):
1305                 InfoExtractor.__init__(self, downloader)
1306
1307         @staticmethod
1308         def suitable(url):
1309                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1310
1311         def report_download_webpage(self, video_id):
1312                 """Report webpage download."""
1313                 self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
1314
1315         def report_extraction(self, video_id):
1316                 """Report information extraction."""
1317                 self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
1318
1319         def _real_initialize(self):
1320                 return
1321
1322         def _real_extract(self, url):
1323                 # Extract id from URL
1324                 mobj = re.match(self._VALID_URL, url)
1325                 if mobj is None:
1326                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1327                         return
1328
1329                 # At this point we have a new video
1330                 self._downloader.increment_downloads()
1331                 video_id = mobj.group(1)
1332
1333                 video_extension = 'flv'
1334
1335                 # Retrieve video webpage to extract further information
1336                 request = urllib2.Request(url)
1337                 try:
1338                         self.report_download_webpage(video_id)
1339                         webpage = urllib2.urlopen(request).read()
1340                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1341                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1342                         return
1343
1344                 # Extract URL, uploader, and title from webpage
1345                 self.report_extraction(video_id)
1346                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1347                 if mobj is None:
1348                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1349                         return
1350                 mediaURL = urllib.unquote(mobj.group(1))
1351
1352                 video_url = mediaURL
1353
1354                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1355                 if mobj is None:
1356                         self._downloader.trouble(u'ERROR: unable to extract title')
1357                         return
1358                 video_title = mobj.group(1).decode('utf-8')
1359                 video_title = sanitize_title(video_title)
1360                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1361
1362                 video_uploader = mobj.group(2).decode('utf-8')
1363
1364                 try:
1365                         # Process video information
1366                         self._downloader.process_info({
1367                                 'id':           video_id.decode('utf-8'),
1368                                 'url':          video_url.decode('utf-8'),
1369                                 'uploader':     video_uploader,
1370                                 'title':        video_title,
1371                                 'stitle':       simple_title,
1372                                 'ext':          video_extension.decode('utf-8'),
1373                                 'format':       u'NA',
1374                                 'player_url':   None,
1375                         })
1376                 except UnavailableVideoError:
1377                         self._downloader.trouble(u'ERROR: unable to download video')
1378
1379
1380 class YahooIE(InfoExtractor):
1381         """Information extractor for video.yahoo.com."""
1382
1383         # _VALID_URL matches all Yahoo! Video URLs
1384         # _VPAGE_URL matches only the extractable '/watch/' URLs
1385         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1386         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1387
1388         def __init__(self, downloader=None):
1389                 InfoExtractor.__init__(self, downloader)
1390
1391         @staticmethod
1392         def suitable(url):
1393                 return (re.match(YahooIE._VALID_URL, url) is not None)
1394
1395         def report_download_webpage(self, video_id):
1396                 """Report webpage download."""
1397                 self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id)
1398
1399         def report_extraction(self, video_id):
1400                 """Report information extraction."""
1401                 self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id)
1402
1403         def _real_initialize(self):
1404                 return
1405
1406         def _real_extract(self, url, new_video=True):
1407                 # Extract ID from URL
1408                 mobj = re.match(self._VALID_URL, url)
1409                 if mobj is None:
1410                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1411                         return
1412
1413                 # At this point we have a new video
1414                 self._downloader.increment_downloads()
1415                 video_id = mobj.group(2)
1416                 video_extension = 'flv'
1417
1418                 # Rewrite valid but non-extractable URLs as
1419                 # extractable English language /watch/ URLs
1420                 if re.match(self._VPAGE_URL, url) is None:
1421                         request = urllib2.Request(url)
1422                         try:
1423                                 webpage = urllib2.urlopen(request).read()
1424                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1425                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1426                                 return
1427
1428                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1429                         if mobj is None:
1430                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1431                                 return
1432                         yahoo_id = mobj.group(1)
1433
1434                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1435                         if mobj is None:
1436                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1437                                 return
1438                         yahoo_vid = mobj.group(1)
1439
1440                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1441                         return self._real_extract(url, new_video=False)
1442
1443                 # Retrieve video webpage to extract further information
1444                 request = urllib2.Request(url)
1445                 try:
1446                         self.report_download_webpage(video_id)
1447                         webpage = urllib2.urlopen(request).read()
1448                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1449                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1450                         return
1451
1452                 # Extract uploader and title from webpage
1453                 self.report_extraction(video_id)
1454                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1455                 if mobj is None:
1456                         self._downloader.trouble(u'ERROR: unable to extract video title')
1457                         return
1458                 video_title = mobj.group(1).decode('utf-8')
1459                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1460
1461                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1462                 if mobj is None:
1463                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1464                         return
1465                 video_uploader = mobj.group(1).decode('utf-8')
1466
1467                 # Extract video thumbnail
1468                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1469                 if mobj is None:
1470                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1471                         return
1472                 video_thumbnail = mobj.group(1).decode('utf-8')
1473
1474                 # Extract video description
1475                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1476                 if mobj is None:
1477                         self._downloader.trouble(u'ERROR: unable to extract video description')
1478                         return
1479                 video_description = mobj.group(1).decode('utf-8')
1480                 if not video_description: video_description = 'No description available.'
1481
1482                 # Extract video height and width
1483                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1484                 if mobj is None:
1485                         self._downloader.trouble(u'ERROR: unable to extract video height')
1486                         return
1487                 yv_video_height = mobj.group(1)
1488
1489                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1490                 if mobj is None:
1491                         self._downloader.trouble(u'ERROR: unable to extract video width')
1492                         return
1493                 yv_video_width = mobj.group(1)
1494
1495                 # Retrieve video playlist to extract media URL
1496                 # I'm not completely sure what all these options are, but we
1497                 # seem to need most of them, otherwise the server sends a 401.
1498                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1499                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1500                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1501                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1502                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1503                 try:
1504                         self.report_download_webpage(video_id)
1505                         webpage = urllib2.urlopen(request).read()
1506                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1507                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1508                         return
1509
1510                 # Extract media URL from playlist XML
1511                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1512                 if mobj is None:
1513                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1514                         return
1515                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1516                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1517
1518                 try:
1519                         # Process video information
1520                         self._downloader.process_info({
1521                                 'id':           video_id.decode('utf-8'),
1522                                 'url':          video_url,
1523                                 'uploader':     video_uploader,
1524                                 'title':        video_title,
1525                                 'stitle':       simple_title,
1526                                 'ext':          video_extension.decode('utf-8'),
1527                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1528                                 'description':  video_description,
1529                                 'thumbnail':    video_thumbnail,
1530                                 'description':  video_description,
1531                                 'player_url':   None,
1532                         })
1533                 except UnavailableVideoError:
1534                         self._downloader.trouble(u'ERROR: unable to download video')
1535
1536
1537 class GenericIE(InfoExtractor):
1538         """Generic last-resort information extractor."""
1539
1540         def __init__(self, downloader=None):
1541                 InfoExtractor.__init__(self, downloader)
1542
1543         @staticmethod
1544         def suitable(url):
1545                 return True
1546
1547         def report_download_webpage(self, video_id):
1548                 """Report webpage download."""
1549                 self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.')
1550                 self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id)
1551
1552         def report_extraction(self, video_id):
1553                 """Report information extraction."""
1554                 self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id)
1555
1556         def _real_initialize(self):
1557                 return
1558
1559         def _real_extract(self, url):
1560                 # At this point we have a new video
1561                 self._downloader.increment_downloads()
1562
1563                 video_id = url.split('/')[-1]
1564                 request = urllib2.Request(url)
1565                 try:
1566                         self.report_download_webpage(video_id)
1567                         webpage = urllib2.urlopen(request).read()
1568                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1569                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1570                         return
1571                 except ValueError, err:
1572                         # since this is the last-resort InfoExtractor, if
1573                         # this error is thrown, it'll be thrown here
1574                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1575                         return
1576
1577                 # Start with something easy: JW Player in SWFObject
1578                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1579                 if mobj is None:
1580                         # Broaden the search a little bit
1581                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1582                 if mobj is None:
1583                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1584                         return
1585
1586                 # It's possible that one of the regexes
1587                 # matched, but returned an empty group:
1588                 if mobj.group(1) is None:
1589                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1590                         return
1591
1592                 video_url = urllib.unquote(mobj.group(1))
1593                 video_id  = os.path.basename(video_url)
1594
1595                 # here's a fun little line of code for you:
1596                 video_extension = os.path.splitext(video_id)[1][1:]
1597                 video_id        = os.path.splitext(video_id)[0]
1598
1599                 # it's tempting to parse this further, but you would
1600                 # have to take into account all the variations like
1601                 #   Video Title - Site Name
1602                 #   Site Name | Video Title
1603                 #   Video Title - Tagline | Site Name
1604                 # and so on and so forth; it's just not practical
1605                 mobj = re.search(r'<title>(.*)</title>', webpage)
1606                 if mobj is None:
1607                         self._downloader.trouble(u'ERROR: unable to extract title')
1608                         return
1609                 video_title = mobj.group(1).decode('utf-8')
1610                 video_title = sanitize_title(video_title)
1611                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1612
1613                 # video uploader is domain name
1614                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1615                 if mobj is None:
1616                         self._downloader.trouble(u'ERROR: unable to extract title')
1617                         return
1618                 video_uploader = mobj.group(1).decode('utf-8')
1619
1620                 try:
1621                         # Process video information
1622                         self._downloader.process_info({
1623                                 'id':           video_id.decode('utf-8'),
1624                                 'url':          video_url.decode('utf-8'),
1625                                 'uploader':     video_uploader,
1626                                 'title':        video_title,
1627                                 'stitle':       simple_title,
1628                                 'ext':          video_extension.decode('utf-8'),
1629                                 'format':       u'NA',
1630                                 'player_url':   None,
1631                         })
1632                 except UnavailableVideoError, err:
1633                         self._downloader.trouble(u'ERROR: unable to download video')
1634
1635
1636 class YoutubeSearchIE(InfoExtractor):
1637         """Information Extractor for YouTube search queries."""
1638         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1639         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1640         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1641         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1642         _youtube_ie = None
1643         _max_youtube_results = 1000
1644
1645         def __init__(self, youtube_ie, downloader=None):
1646                 InfoExtractor.__init__(self, downloader)
1647                 self._youtube_ie = youtube_ie
1648
1649         @staticmethod
1650         def suitable(url):
1651                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1652
1653         def report_download_page(self, query, pagenum):
1654                 """Report attempt to download playlist page with given number."""
1655                 query = query.decode(preferredencoding())
1656                 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1657
1658         def _real_initialize(self):
1659                 self._youtube_ie.initialize()
1660
1661         def _real_extract(self, query):
1662                 mobj = re.match(self._VALID_QUERY, query)
1663                 if mobj is None:
1664                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1665                         return
1666
1667                 prefix, query = query.split(':')
1668                 prefix = prefix[8:]
1669                 query  = query.encode('utf-8')
1670                 if prefix == '':
1671                         self._download_n_results(query, 1)
1672                         return
1673                 elif prefix == 'all':
1674                         self._download_n_results(query, self._max_youtube_results)
1675                         return
1676                 else:
1677                         try:
1678                                 n = long(prefix)
1679                                 if n <= 0:
1680                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1681                                         return
1682                                 elif n > self._max_youtube_results:
1683                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1684                                         n = self._max_youtube_results
1685                                 self._download_n_results(query, n)
1686                                 return
1687                         except ValueError: # parsing prefix as integer fails
1688                                 self._download_n_results(query, 1)
1689                                 return
1690
1691         def _download_n_results(self, query, n):
1692                 """Downloads a specified number of results for a query"""
1693
1694                 video_ids = []
1695                 already_seen = set()
1696                 pagenum = 1
1697
1698                 while True:
1699                         self.report_download_page(query, pagenum)
1700                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1701                         request = urllib2.Request(result_url, None, std_headers)
1702                         try:
1703                                 page = urllib2.urlopen(request).read()
1704                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1705                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1706                                 return
1707
1708                         # Extract video identifiers
1709                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1710                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1711                                 if video_id not in already_seen:
1712                                         video_ids.append(video_id)
1713                                         already_seen.add(video_id)
1714                                         if len(video_ids) == n:
1715                                                 # Specified n videos reached
1716                                                 for id in video_ids:
1717                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1718                                                 return
1719
1720                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1721                                 for id in video_ids:
1722                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1723                                 return
1724
1725                         pagenum = pagenum + 1
1726
1727 class GoogleSearchIE(InfoExtractor):
1728         """Information Extractor for Google Video search queries."""
1729         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1730         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1731         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1732         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1733         _google_ie = None
1734         _max_google_results = 1000
1735
1736         def __init__(self, google_ie, downloader=None):
1737                 InfoExtractor.__init__(self, downloader)
1738                 self._google_ie = google_ie
1739
1740         @staticmethod
1741         def suitable(url):
1742                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1743
1744         def report_download_page(self, query, pagenum):
1745                 """Report attempt to download playlist page with given number."""
1746                 query = query.decode(preferredencoding())
1747                 self._downloader.to_stdout(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1748
1749         def _real_initialize(self):
1750                 self._google_ie.initialize()
1751
1752         def _real_extract(self, query):
1753                 mobj = re.match(self._VALID_QUERY, query)
1754                 if mobj is None:
1755                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1756                         return
1757
1758                 prefix, query = query.split(':')
1759                 prefix = prefix[8:]
1760                 query  = query.encode('utf-8')
1761                 if prefix == '':
1762                         self._download_n_results(query, 1)
1763                         return
1764                 elif prefix == 'all':
1765                         self._download_n_results(query, self._max_google_results)
1766                         return
1767                 else:
1768                         try:
1769                                 n = long(prefix)
1770                                 if n <= 0:
1771                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1772                                         return
1773                                 elif n > self._max_google_results:
1774                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1775                                         n = self._max_google_results
1776                                 self._download_n_results(query, n)
1777                                 return
1778                         except ValueError: # parsing prefix as integer fails
1779                                 self._download_n_results(query, 1)
1780                                 return
1781
1782         def _download_n_results(self, query, n):
1783                 """Downloads a specified number of results for a query"""
1784
1785                 video_ids = []
1786                 already_seen = set()
1787                 pagenum = 1
1788
1789                 while True:
1790                         self.report_download_page(query, pagenum)
1791                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1792                         request = urllib2.Request(result_url, None, std_headers)
1793                         try:
1794                                 page = urllib2.urlopen(request).read()
1795                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1796                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1797                                 return
1798
1799                         # Extract video identifiers
1800                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1801                                 video_id = mobj.group(1)
1802                                 if video_id not in already_seen:
1803                                         video_ids.append(video_id)
1804                                         already_seen.add(video_id)
1805                                         if len(video_ids) == n:
1806                                                 # Specified n videos reached
1807                                                 for id in video_ids:
1808                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1809                                                 return
1810
1811                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1812                                 for id in video_ids:
1813                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1814                                 return
1815
1816                         pagenum = pagenum + 1
1817
1818 class YahooSearchIE(InfoExtractor):
1819         """Information Extractor for Yahoo! Video search queries."""
1820         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1821         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1822         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1823         _MORE_PAGES_INDICATOR = r'\s*Next'
1824         _yahoo_ie = None
1825         _max_yahoo_results = 1000
1826
1827         def __init__(self, yahoo_ie, downloader=None):
1828                 InfoExtractor.__init__(self, downloader)
1829                 self._yahoo_ie = yahoo_ie
1830
1831         @staticmethod
1832         def suitable(url):
1833                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1834
1835         def report_download_page(self, query, pagenum):
1836                 """Report attempt to download playlist page with given number."""
1837                 query = query.decode(preferredencoding())
1838                 self._downloader.to_stdout(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1839
1840         def _real_initialize(self):
1841                 self._yahoo_ie.initialize()
1842
1843         def _real_extract(self, query):
1844                 mobj = re.match(self._VALID_QUERY, query)
1845                 if mobj is None:
1846                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1847                         return
1848
1849                 prefix, query = query.split(':')
1850                 prefix = prefix[8:]
1851                 query  = query.encode('utf-8')
1852                 if prefix == '':
1853                         self._download_n_results(query, 1)
1854                         return
1855                 elif prefix == 'all':
1856                         self._download_n_results(query, self._max_yahoo_results)
1857                         return
1858                 else:
1859                         try:
1860                                 n = long(prefix)
1861                                 if n <= 0:
1862                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1863                                         return
1864                                 elif n > self._max_yahoo_results:
1865                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
1866                                         n = self._max_yahoo_results
1867                                 self._download_n_results(query, n)
1868                                 return
1869                         except ValueError: # parsing prefix as integer fails
1870                                 self._download_n_results(query, 1)
1871                                 return
1872
1873         def _download_n_results(self, query, n):
1874                 """Downloads a specified number of results for a query"""
1875
1876                 video_ids = []
1877                 already_seen = set()
1878                 pagenum = 1
1879
1880                 while True:
1881                         self.report_download_page(query, pagenum)
1882                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1883                         request = urllib2.Request(result_url, None, std_headers)
1884                         try:
1885                                 page = urllib2.urlopen(request).read()
1886                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1887                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1888                                 return
1889
1890                         # Extract video identifiers
1891                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1892                                 video_id = mobj.group(1)
1893                                 if video_id not in already_seen:
1894                                         video_ids.append(video_id)
1895                                         already_seen.add(video_id)
1896                                         if len(video_ids) == n:
1897                                                 # Specified n videos reached
1898                                                 for id in video_ids:
1899                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1900                                                 return
1901
1902                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1903                                 for id in video_ids:
1904                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1905                                 return
1906
1907                         pagenum = pagenum + 1
1908
1909 class YoutubePlaylistIE(InfoExtractor):
1910         """Information Extractor for YouTube playlists."""
1911
1912         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1913         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1914         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1915         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1916         _youtube_ie = None
1917
1918         def __init__(self, youtube_ie, downloader=None):
1919                 InfoExtractor.__init__(self, downloader)
1920                 self._youtube_ie = youtube_ie
1921
1922         @staticmethod
1923         def suitable(url):
1924                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1925
1926         def report_download_page(self, playlist_id, pagenum):
1927                 """Report attempt to download playlist page with given number."""
1928                 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1929
1930         def _real_initialize(self):
1931                 self._youtube_ie.initialize()
1932
1933         def _real_extract(self, url):
1934                 # Extract playlist id
1935                 mobj = re.match(self._VALID_URL, url)
1936                 if mobj is None:
1937                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1938                         return
1939
1940                 # Download playlist pages
1941                 playlist_id = mobj.group(1)
1942                 video_ids = []
1943                 pagenum = 1
1944
1945                 while True:
1946                         self.report_download_page(playlist_id, pagenum)
1947                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1948                         try:
1949                                 page = urllib2.urlopen(request).read()
1950                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1951                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1952                                 return
1953
1954                         # Extract video identifiers
1955                         ids_in_page = []
1956                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1957                                 if mobj.group(1) not in ids_in_page:
1958                                         ids_in_page.append(mobj.group(1))
1959                         video_ids.extend(ids_in_page)
1960
1961                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1962                                 break
1963                         pagenum = pagenum + 1
1964
1965                 playliststart = self._downloader.params.get('playliststart', 1)
1966                 playliststart -= 1 #our arrays are zero-based but the playlist is 1-based
1967                 if playliststart > 0:
1968                         video_ids = video_ids[playliststart:]
1969
1970                 for id in video_ids:
1971                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1972                 return
1973
1974 class YoutubeUserIE(InfoExtractor):
1975         """Information Extractor for YouTube users."""
1976
1977         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1978         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1979         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1980         _youtube_ie = None
1981
1982         def __init__(self, youtube_ie, downloader=None):
1983                 InfoExtractor.__init__(self, downloader)
1984                 self._youtube_ie = youtube_ie
1985
1986         @staticmethod
1987         def suitable(url):
1988                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1989
1990         def report_download_page(self, username):
1991                 """Report attempt to download user page."""
1992                 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1993
1994         def _real_initialize(self):
1995                 self._youtube_ie.initialize()
1996
1997         def _real_extract(self, url):
1998                 # Extract username
1999                 mobj = re.match(self._VALID_URL, url)
2000                 if mobj is None:
2001                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2002                         return
2003
2004                 # Download user page
2005                 username = mobj.group(1)
2006                 video_ids = []
2007                 pagenum = 1
2008
2009                 self.report_download_page(username)
2010                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
2011                 try:
2012                         page = urllib2.urlopen(request).read()
2013                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2014                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2015                         return
2016
2017                 # Extract video identifiers
2018                 ids_in_page = []
2019
2020                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2021                         if mobj.group(1) not in ids_in_page:
2022                                 ids_in_page.append(mobj.group(1))
2023                 video_ids.extend(ids_in_page)
2024
2025                 playliststart = self._downloader.params.get('playliststart', 1)
2026                 playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based
2027                 if playliststart > 0:
2028                         video_ids = video_ids[playliststart:]
2029
2030                 for id in video_ids:
2031                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2032                 return
2033
2034 class PostProcessor(object):
2035         """Post Processor class.
2036
2037         PostProcessor objects can be added to downloaders with their
2038         add_post_processor() method. When the downloader has finished a
2039         successful download, it will take its internal chain of PostProcessors
2040         and start calling the run() method on each one of them, first with
2041         an initial argument and then with the returned value of the previous
2042         PostProcessor.
2043
2044         The chain will be stopped if one of them ever returns None or the end
2045         of the chain is reached.
2046
2047         PostProcessor objects follow a "mutual registration" process similar
2048         to InfoExtractor objects.
2049         """
2050
2051         _downloader = None
2052
2053         def __init__(self, downloader=None):
2054                 self._downloader = downloader
2055
2056         def set_downloader(self, downloader):
2057                 """Sets the downloader for this PP."""
2058                 self._downloader = downloader
2059
2060         def run(self, information):
2061                 """Run the PostProcessor.
2062
2063                 The "information" argument is a dictionary like the ones
2064                 composed by InfoExtractors. The only difference is that this
2065                 one has an extra field called "filepath" that points to the
2066                 downloaded file.
2067
2068                 When this method returns None, the postprocessing chain is
2069                 stopped. However, this method may return an information
2070                 dictionary that will be passed to the next postprocessing
2071                 object in the chain. It can be the one it received after
2072                 changing some fields.
2073
2074                 In addition, this method may raise a PostProcessingError
2075                 exception that will be taken into account by the downloader
2076                 it was called from.
2077                 """
2078                 return information # by default, do nothing
2079
2080 ### MAIN PROGRAM ###
2081 if __name__ == '__main__':
2082         try:
2083                 # Modules needed only when running the main program
2084                 import getpass
2085                 import optparse
2086
2087                 # Function to update the program file with the latest version from bitbucket.org
2088                 def update_self(downloader, filename):
2089                         # Note: downloader only used for options
2090                         if not os.access (filename, os.W_OK):
2091                                 sys.exit('ERROR: no write permissions on %s' % filename)
2092
2093                         downloader.to_stdout('Updating to latest stable version...')
2094                         latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
2095                         latest_version = urllib.urlopen(latest_url).read().strip()
2096                         prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2097                         newcontent = urllib.urlopen(prog_url).read()
2098                         stream = open(filename, 'w')
2099                         stream.write(newcontent)
2100                         stream.close()
2101                         downloader.to_stdout('Updated to version %s' % latest_version)
2102
2103                 # Parse command line
2104                 parser = optparse.OptionParser(
2105                         usage='Usage: %prog [options] url...',
2106                         version='2010.10.03',
2107                         conflict_handler='resolve',
2108                 )
2109
2110                 parser.add_option('-h', '--help',
2111                                 action='help', help='print this help text and exit')
2112                 parser.add_option('-v', '--version',
2113                                 action='version', help='print program version and exit')
2114                 parser.add_option('-U', '--update',
2115                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2116                 parser.add_option('-i', '--ignore-errors',
2117                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2118                 parser.add_option('-r', '--rate-limit',
2119                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2120                 parser.add_option('-R', '--retries',
2121                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2122                 parser.add_option('--playlist-start',
2123                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2124
2125                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2126                 authentication.add_option('-u', '--username',
2127                                 dest='username', metavar='USERNAME', help='account username')
2128                 authentication.add_option('-p', '--password',
2129                                 dest='password', metavar='PASSWORD', help='account password')
2130                 authentication.add_option('-n', '--netrc',
2131                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2132                 parser.add_option_group(authentication)
2133
2134                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2135                 video_format.add_option('-f', '--format',
2136                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2137                 video_format.add_option('-m', '--mobile-version',
2138                                 action='store_const', dest='format', help='alias for -f 17', const='17')
2139                 video_format.add_option('--all-formats',
2140                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2141                 video_format.add_option('--max-quality',
2142                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2143                 video_format.add_option('-b', '--best-quality',
2144                                 action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)')
2145                 parser.add_option_group(video_format)
2146
2147                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2148                 verbosity.add_option('-q', '--quiet',
2149                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2150                 verbosity.add_option('-s', '--simulate',
2151                                 action='store_true', dest='simulate', help='do not download video', default=False)
2152                 verbosity.add_option('-g', '--get-url',
2153                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2154                 verbosity.add_option('-e', '--get-title',
2155                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2156                 verbosity.add_option('--get-thumbnail',
2157                                 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2158                 verbosity.add_option('--get-description',
2159                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2160                 verbosity.add_option('--no-progress',
2161                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2162                 parser.add_option_group(verbosity)
2163
2164                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2165                 filesystem.add_option('-t', '--title',
2166                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2167                 filesystem.add_option('-l', '--literal',
2168                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2169                 filesystem.add_option('-o', '--output',
2170                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2171                 filesystem.add_option('-a', '--batch-file',
2172                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2173                 filesystem.add_option('-w', '--no-overwrites',
2174                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2175                 filesystem.add_option('-c', '--continue',
2176                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2177                 filesystem.add_option('--cookies',
2178                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2179                 parser.add_option_group(filesystem)
2180
2181                 (opts, args) = parser.parse_args()
2182
2183                 # Open appropriate CookieJar
2184                 if opts.cookiefile is None:
2185                         jar = cookielib.CookieJar()
2186                 else:
2187                         try:
2188                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2189                         except (IOError, OSError), err:
2190                                 sys.exit(u'ERROR: unable to open cookie file')
2191
2192                 # General configuration
2193                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2194                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2195                 urllib2.install_opener(urllib2.build_opener(cookie_processor))
2196                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2197
2198                 # Batch file verification
2199                 batchurls = []
2200                 if opts.batchfile is not None:
2201                         try:
2202                                 if opts.batchfile == '-':
2203                                         batchfd = sys.stdin
2204                                 else:
2205                                         batchfd = open(opts.batchfile, 'r')
2206                                 batchurls = batchfd.readlines()
2207                                 batchurls = [x.strip() for x in batchurls]
2208                                 batchurls = [x for x in batchurls if len(x) > 0]
2209                         except IOError:
2210                                 sys.exit(u'ERROR: batch file could not be read')
2211                 all_urls = batchurls + args
2212
2213                 # Conflicting, missing and erroneous options
2214                 if opts.bestquality:
2215                         print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n'
2216                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2217                         parser.error(u'using .netrc conflicts with giving username/password')
2218                 if opts.password is not None and opts.username is None:
2219                         parser.error(u'account username missing')
2220                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
2221                         parser.error(u'using output template conflicts with using title or literal title')
2222                 if opts.usetitle and opts.useliteral:
2223                         parser.error(u'using title conflicts with using literal title')
2224                 if opts.username is not None and opts.password is None:
2225                         opts.password = getpass.getpass(u'Type account password and press return:')
2226                 if opts.ratelimit is not None:
2227                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2228                         if numeric_limit is None:
2229                                 parser.error(u'invalid rate limit specified')
2230                         opts.ratelimit = numeric_limit
2231                 if opts.retries is not None:
2232                         try:
2233                                 opts.retries = long(opts.retries)
2234                         except (TypeError, ValueError), err:
2235                                 parser.error(u'invalid retry count specified')
2236                 if opts.playliststart is not None:
2237                         try:
2238                                 opts.playliststart = long(opts.playliststart)
2239                         except (TypeError, ValueError), err:
2240                                 parser.error(u'invalid playlist page specified')
2241
2242                 # Information extractors
2243                 youtube_ie = YoutubeIE()
2244                 metacafe_ie = MetacafeIE(youtube_ie)
2245                 dailymotion_ie = DailymotionIE()
2246                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2247                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2248                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2249                 google_ie = GoogleIE()
2250                 google_search_ie = GoogleSearchIE(google_ie)
2251                 photobucket_ie = PhotobucketIE()
2252                 yahoo_ie = YahooIE()
2253                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2254                 generic_ie = GenericIE()
2255
2256                 # File downloader
2257                 fd = FileDownloader({
2258                         'usenetrc': opts.usenetrc,
2259                         'username': opts.username,
2260                         'password': opts.password,
2261                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2262                         'forceurl': opts.geturl,
2263                         'forcetitle': opts.gettitle,
2264                         'forcethumbnail': opts.getthumbnail,
2265                         'forcedescription': opts.getdescription,
2266                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2267                         'format': opts.format,
2268                         'format_limit': opts.format_limit,
2269                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2270                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2271                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2272                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2273                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2274                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2275                                 or u'%(id)s.%(ext)s'),
2276                         'ignoreerrors': opts.ignoreerrors,
2277                         'ratelimit': opts.ratelimit,
2278                         'nooverwrites': opts.nooverwrites,
2279                         'retries': opts.retries,
2280                         'continuedl': opts.continue_dl,
2281                         'noprogress': opts.noprogress,
2282                         'playliststart': opts.playliststart,
2283                         })
2284                 fd.add_info_extractor(youtube_search_ie)
2285                 fd.add_info_extractor(youtube_pl_ie)
2286                 fd.add_info_extractor(youtube_user_ie)
2287                 fd.add_info_extractor(metacafe_ie)
2288                 fd.add_info_extractor(dailymotion_ie)
2289                 fd.add_info_extractor(youtube_ie)
2290                 fd.add_info_extractor(google_ie)
2291                 fd.add_info_extractor(google_search_ie)
2292                 fd.add_info_extractor(photobucket_ie)
2293                 fd.add_info_extractor(yahoo_ie)
2294                 fd.add_info_extractor(yahoo_search_ie)
2295
2296                 # This must come last since it's the
2297                 # fallback if none of the others work
2298                 fd.add_info_extractor(generic_ie)
2299
2300                 # Update version
2301                 if opts.update_self:
2302                         update_self(fd, sys.argv[0])
2303
2304                 # Maybe do nothing
2305                 if len(all_urls) < 1:
2306                         if not opts.update_self:
2307                                 parser.error(u'you must provide at least one URL')
2308                         else:
2309                                 sys.exit()
2310                 retcode = fd.download(all_urls)
2311
2312                 # Dump cookie jar if requested
2313                 if opts.cookiefile is not None:
2314                         try:
2315                                 jar.save()
2316                         except (IOError, OSError), err:
2317                                 sys.exit(u'ERROR: unable to save cookie jar')
2318
2319                 sys.exit(retcode)
2320
2321         except DownloadError:
2322                 sys.exit(1)
2323         except SameFileError:
2324                 sys.exit(u'ERROR: fixed output name but more than one file to download')
2325         except KeyboardInterrupt:
2326                 sys.exit(u'\nERROR: Interrupted by user')