youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # Author: Ricardo Garcia Gonzalez
   4 # Author: Danny Colligan
   5 # Author: Benjamin Johnson
   6 # License: Public domain code
   7 import cookielib
   8 import datetime
   9 import htmlentitydefs
  10 import httplib
  11 import locale
  12 import math
  13 import netrc
  14 import os
  15 import os.path
  16 import re
  17 import socket
  18 import string
  19 import subprocess
  20 import sys
  21 import time
  22 import urllib
  23 import urllib2
  24
  25 # parse_qs was moved from the cgi module to the urlparse module recently.
  26 try:
  27         from urlparse import parse_qs
  28 except ImportError:
  29         from cgi import parse_qs
  30
  31 std_headers = {
  32         'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12',
  33         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  34         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  35         'Accept-Language': 'en-us,en;q=0.5',
  36 }
  37
  38 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  39
  40 def preferredencoding():
  41         """Get preferred encoding.
  42
  43         Returns the best encoding scheme for the system, based on
  44         locale.getpreferredencoding() and some further tweaks.
  45         """
  46         def yield_preferredencoding():
  47                 try:
  48                         pref = locale.getpreferredencoding()
  49                         u'TEST'.encode(pref)
  50                 except:
  51                         pref = 'UTF-8'
  52                 while True:
  53                         yield pref
  54         return yield_preferredencoding().next()
  55
  56 def htmlentity_transform(matchobj):
  57         """Transforms an HTML entity to a Unicode character.
  58
  59         This function receives a match object and is intended to be used with
  60         the re.sub() function.
  61         """
  62         entity = matchobj.group(1)
  63
  64         # Known non-numeric HTML entity
  65         if entity in htmlentitydefs.name2codepoint:
  66                 return unichr(htmlentitydefs.name2codepoint[entity])
  67
  68         # Unicode character
  69         mobj = re.match(ur'(?u)#(x?\d+)', entity)
  70         if mobj is not None:
  71                 numstr = mobj.group(1)
  72                 if numstr.startswith(u'x'):
  73                         base = 16
  74                         numstr = u'0%s' % numstr
  75                 else:
  76                         base = 10
  77                 return unichr(long(numstr, base))
  78
  79         # Unknown entity in name, return its literal representation
  80         return (u'&%s;' % entity)
  81
  82 def sanitize_title(utitle):
  83         """Sanitizes a video title so it could be used as part of a filename."""
  84         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
  85         return utitle.replace(unicode(os.sep), u'%')
  86
  87 def sanitize_open(filename, open_mode):
  88         """Try to open the given filename, and slightly tweak it if this fails.
  89
  90         Attempts to open the given filename. If this fails, it tries to change
  91         the filename slightly, step by step, until it's either able to open it
  92         or it fails and raises a final exception, like the standard open()
  93         function.
  94
  95         It returns the tuple (stream, definitive_file_name).
  96         """
  97         try:
  98                 if filename == u'-':
  99                         if sys.platform == 'win32':
 100                                 import msvcrt
 101                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 102                         return (sys.stdout, filename)
 103                 stream = open(filename, open_mode)
 104                 return (stream, filename)
 105         except (IOError, OSError), err:
 106                 # In case of error, try to remove win32 forbidden chars
 107                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
 108
 109                 # An exception here should be caught in the caller
 110                 stream = open(filename, open_mode)
 111                 return (stream, filename)
 112
 113
 114 class DownloadError(Exception):
 115         """Download Error exception.
 116
 117         This exception may be thrown by FileDownloader objects if they are not
 118         configured to continue on errors. They will contain the appropriate
 119         error message.
 120         """
 121         pass
 122
 123 class SameFileError(Exception):
 124         """Same File exception.
 125
 126         This exception will be thrown by FileDownloader objects if they detect
 127         multiple files would have to be downloaded to the same file on disk.
 128         """
 129         pass
 130
 131 class PostProcessingError(Exception):
 132         """Post Processing exception.
 133
 134         This exception may be raised by PostProcessor's .run() method to
 135         indicate an error in the postprocessing task.
 136         """
 137         pass
 138
 139 class UnavailableVideoError(Exception):
 140         """Unavailable Format exception.
 141
 142         This exception will be thrown when a video is requested
 143         in a format that is not available for that video.
 144         """
 145         pass
 146
 147 class ContentTooShortError(Exception):
 148         """Content Too Short exception.
 149
 150         This exception may be raised by FileDownloader objects when a file they
 151         download is too small for what the server announced first, indicating
 152         the connection was probably interrupted.
 153         """
 154         # Both in bytes
 155         downloaded = None
 156         expected = None
 157
 158         def __init__(self, downloaded, expected):
 159                 self.downloaded = downloaded
 160                 self.expected = expected
 161
 162 class FileDownloader(object):
 163         """File Downloader class.
 164
 165         File downloader objects are the ones responsible of downloading the
 166         actual video file and writing it to disk if the user has requested
 167         it, among some other tasks. In most cases there should be one per
 168         program. As, given a video URL, the downloader doesn't know how to
 169         extract all the needed information, task that InfoExtractors do, it
 170         has to pass the URL to one of them.
 171
 172         For this, file downloader objects have a method that allows
 173         InfoExtractors to be registered in a given order. When it is passed
 174         a URL, the file downloader handles it to the first InfoExtractor it
 175         finds that reports being able to handle it. The InfoExtractor extracts
 176         all the information about the video or videos the URL refers to, and
 177         asks the FileDownloader to process the video information, possibly
 178         downloading the video.
 179
 180         File downloaders accept a lot of parameters. In order not to saturate
 181         the object constructor with arguments, it receives a dictionary of
 182         options instead. These options are available through the params
 183         attribute for the InfoExtractors to use. The FileDownloader also
 184         registers itself as the downloader in charge for the InfoExtractors
 185         that are added to it, so this is a "mutual registration".
 186
 187         Available options:
 188
 189         username:         Username for authentication purposes.
 190         password:         Password for authentication purposes.
 191         usenetrc:         Use netrc for authentication instead.
 192         quiet:            Do not print messages to stdout.
 193         forceurl:         Force printing final URL.
 194         forcetitle:       Force printing title.
 195         forcethumbnail:   Force printing thumbnail URL.
 196         forcedescription: Force printing description.
 197         simulate:         Do not download the video files.
 198         format:           Video format code.
 199         format_limit:     Highest quality format to try.
 200         outtmpl:          Template for output names.
 201         ignoreerrors:     Do not stop on download errors.
 202         ratelimit:        Download speed limit, in bytes/sec.
 203         nooverwrites:     Prevent overwriting files.
 204         retries:          Number of times to retry for HTTP error 5xx
 205         continuedl:       Try to continue downloads if possible.
 206         noprogress:       Do not print the progress bar.
 207         playliststart:    Playlist item to start at.
 208         playlistend:      Playlist item to end at.
 209         logtostderr:      Log messages to stderr instead of stdout.
 210         """
 211
 212         params = None
 213         _ies = []
 214         _pps = []
 215         _download_retcode = None
 216         _num_downloads = None
 217         _screen_file = None
 218
 219         def __init__(self, params):
 220                 """Create a FileDownloader object with the given options."""
 221                 self._ies = []
 222                 self._pps = []
 223                 self._download_retcode = 0
 224                 self._num_downloads = 0
 225                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 226                 self.params = params
 227
 228         @staticmethod
 229         def pmkdir(filename):
 230                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 231                 components = filename.split(os.sep)
 232                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 233                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 234                 for dir in aggregate:
 235                         if not os.path.exists(dir):
 236                                 os.mkdir(dir)
 237
 238         @staticmethod
 239         def temp_name(filename):
 240                 """Returns a temporary filename for the given filename."""
 241                 return filename + '.part'
 242
 243         @staticmethod
 244         def format_bytes(bytes):
 245                 if bytes is None:
 246                         return 'N/A'
 247                 if type(bytes) is str:
 248                         bytes = float(bytes)
 249                 if bytes == 0.0:
 250                         exponent = 0
 251                 else:
 252                         exponent = long(math.log(bytes, 1024.0))
 253                 suffix = 'bkMGTPEZY'[exponent]
 254                 converted = float(bytes) / float(1024**exponent)
 255                 return '%.2f%s' % (converted, suffix)
 256
 257         @staticmethod
 258         def calc_percent(byte_counter, data_len):
 259                 if data_len is None:
 260                         return '---.-%'
 261                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 262
 263         @staticmethod
 264         def calc_eta(start, now, total, current):
 265                 if total is None:
 266                         return '--:--'
 267                 dif = now - start
 268                 if current == 0 or dif < 0.001: # One millisecond
 269                         return '--:--'
 270                 rate = float(current) / dif
 271                 eta = long((float(total) - float(current)) / rate)
 272                 (eta_mins, eta_secs) = divmod(eta, 60)
 273                 if eta_mins > 99:
 274                         return '--:--'
 275                 return '%02d:%02d' % (eta_mins, eta_secs)
 276
 277         @staticmethod
 278         def calc_speed(start, now, bytes):
 279                 dif = now - start
 280                 if bytes == 0 or dif < 0.001: # One millisecond
 281                         return '%10s' % '---b/s'
 282                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 283
 284         @staticmethod
 285         def best_block_size(elapsed_time, bytes):
 286                 new_min = max(bytes / 2.0, 1.0)
 287                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 288                 if elapsed_time < 0.001:
 289                         return long(new_max)
 290                 rate = bytes / elapsed_time
 291                 if rate > new_max:
 292                         return long(new_max)
 293                 if rate < new_min:
 294                         return long(new_min)
 295                 return long(rate)
 296
 297         @staticmethod
 298         def parse_bytes(bytestr):
 299                 """Parse a string indicating a byte quantity into a long integer."""
 300                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 301                 if matchobj is None:
 302                         return None
 303                 number = float(matchobj.group(1))
 304                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 305                 return long(round(number * multiplier))
 306
 307         def add_info_extractor(self, ie):
 308                 """Add an InfoExtractor object to the end of the list."""
 309                 self._ies.append(ie)
 310                 ie.set_downloader(self)
 311
 312         def add_post_processor(self, pp):
 313                 """Add a PostProcessor object to the end of the chain."""
 314                 self._pps.append(pp)
 315                 pp.set_downloader(self)
 316
 317         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
 318                 """Print message to stdout if not in quiet mode."""
 319                 try:
 320                         if not self.params.get('quiet', False):
 321                                 terminator = [u'\n', u''][skip_eol]
 322                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
 323                         self._screen_file.flush()
 324                 except (UnicodeEncodeError), err:
 325                         if not ignore_encoding_errors:
 326                                 raise
 327
 328         def to_stderr(self, message):
 329                 """Print message to stderr."""
 330                 print >>sys.stderr, message.encode(preferredencoding())
 331
 332         def fixed_template(self):
 333                 """Checks if the output template is fixed."""
 334                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 335
 336         def trouble(self, message=None):
 337                 """Determine action to take when a download problem appears.
 338
 339                 Depending on if the downloader has been configured to ignore
 340                 download errors or not, this method may throw an exception or
 341                 not when errors are found, after printing the message.
 342                 """
 343                 if message is not None:
 344                         self.to_stderr(message)
 345                 if not self.params.get('ignoreerrors', False):
 346                         raise DownloadError(message)
 347                 self._download_retcode = 1
 348
 349         def slow_down(self, start_time, byte_counter):
 350                 """Sleep if the download speed is over the rate limit."""
 351                 rate_limit = self.params.get('ratelimit', None)
 352                 if rate_limit is None or byte_counter == 0:
 353                         return
 354                 now = time.time()
 355                 elapsed = now - start_time
 356                 if elapsed <= 0.0:
 357                         return
 358                 speed = float(byte_counter) / elapsed
 359                 if speed > rate_limit:
 360                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 361
 362         def try_rename(self, old_filename, new_filename):
 363                 try:
 364                         os.rename(old_filename, new_filename)
 365                 except (IOError, OSError), err:
 366                         self.trouble(u'ERROR: unable to rename file')
 367
 368         def report_destination(self, filename):
 369                 """Report destination filename."""
 370                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
 371
 372         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 373                 """Report download progress."""
 374                 if self.params.get('noprogress', False):
 375                         return
 376                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 377                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 378
 379         def report_resuming_byte(self, resume_len):
 380                 """Report attempt to resume at given byte."""
 381                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 382
 383         def report_retry(self, count, retries):
 384                 """Report retry in case of HTTP error 5xx"""
 385                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 386
 387         def report_file_already_downloaded(self, file_name):
 388                 """Report file has already been fully downloaded."""
 389                 try:
 390                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
 391                 except (UnicodeEncodeError), err:
 392                         self.to_screen(u'[download] The file has already been downloaded')
 393
 394         def report_unable_to_resume(self):
 395                 """Report it was impossible to resume download."""
 396                 self.to_screen(u'[download] Unable to resume')
 397
 398         def report_finish(self):
 399                 """Report download finished."""
 400                 if self.params.get('noprogress', False):
 401                         self.to_screen(u'[download] Download completed')
 402                 else:
 403                         self.to_screen(u'')
 404
 405         def increment_downloads(self):
 406                 """Increment the ordinal that assigns a number to each file."""
 407                 self._num_downloads += 1
 408
 409         def process_info(self, info_dict):
 410                 """Process a single dictionary returned by an InfoExtractor."""
 411                 # Do nothing else if in simulate mode
 412                 if self.params.get('simulate', False):
 413                         # Forced printings
 414                         if self.params.get('forcetitle', False):
 415                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 416                         if self.params.get('forceurl', False):
 417                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 418                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 419                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
 420                         if self.params.get('forcedescription', False) and 'description' in info_dict:
 421                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
 422
 423                         return
 424
 425                 try:
 426                         template_dict = dict(info_dict)
 427                         template_dict['epoch'] = unicode(long(time.time()))
 428                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 429                         filename = self.params['outtmpl'] % template_dict
 430                 except (ValueError, KeyError), err:
 431                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
 432                         return
 433                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 434                         self.to_stderr(u'WARNING: file exists and will be skipped')
 435                         return
 436
 437                 try:
 438                         self.pmkdir(filename)
 439                 except (OSError, IOError), err:
 440                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
 441                         return
 442
 443                 try:
 444                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
 445                 except (OSError, IOError), err:
 446                         raise UnavailableVideoError
 447                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 448                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 449                         return
 450                 except (ContentTooShortError, ), err:
 451                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 452                         return
 453
 454                 if success:
 455                         try:
 456                                 self.post_process(filename, info_dict)
 457                         except (PostProcessingError), err:
 458                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
 459                                 return
 460
 461         def download(self, url_list):
 462                 """Download a given list of URLs."""
 463                 if len(url_list) > 1 and self.fixed_template():
 464                         raise SameFileError(self.params['outtmpl'])
 465
 466                 for url in url_list:
 467                         suitable_found = False
 468                         for ie in self._ies:
 469                                 # Go to next InfoExtractor if not suitable
 470                                 if not ie.suitable(url):
 471                                         continue
 472
 473                                 # Suitable InfoExtractor found
 474                                 suitable_found = True
 475
 476                                 # Extract information from URL and process it
 477                                 ie.extract(url)
 478
 479                                 # Suitable InfoExtractor had been found; go to next URL
 480                                 break
 481
 482                         if not suitable_found:
 483                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 484
 485                 return self._download_retcode
 486
 487         def post_process(self, filename, ie_info):
 488                 """Run the postprocessing chain on the given file."""
 489                 info = dict(ie_info)
 490                 info['filepath'] = filename
 491                 for pp in self._pps:
 492                         info = pp.run(info)
 493                         if info is None:
 494                                 break
 495
 496         def _download_with_rtmpdump(self, filename, url, player_url):
 497                 self.report_destination(filename)
 498                 tmpfilename = self.temp_name(filename)
 499
 500                 # Check for rtmpdump first
 501                 try:
 502                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 503                 except (OSError, IOError):
 504                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 505                         return False
 506
 507                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 508                 # the connection was interrumpted and resuming appears to be
 509                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 510                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 511                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 512                 while retval == 2 or retval == 1:
 513                         prevsize = os.path.getsize(tmpfilename)
 514                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 515                         time.sleep(5.0) # This seems to be needed
 516                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 517                         cursize = os.path.getsize(tmpfilename)
 518                         if prevsize == cursize and retval == 1:
 519                                 break
 520                 if retval == 0:
 521                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
 522                         self.try_rename(tmpfilename, filename)
 523                         return True
 524                 else:
 525                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 526                         return False
 527
 528         def _do_download(self, filename, url, player_url):
 529                 # Check file already present
 530                 if self.params.get('continuedl', False) and os.path.isfile(filename):
 531                         self.report_file_already_downloaded(filename)
 532                         return True
 533
 534                 # Attempt to download using rtmpdump
 535                 if url.startswith('rtmp'):
 536                         return self._download_with_rtmpdump(filename, url, player_url)
 537
 538                 tmpfilename = self.temp_name(filename)
 539                 stream = None
 540                 open_mode = 'wb'
 541                 basic_request = urllib2.Request(url, None, std_headers)
 542                 request = urllib2.Request(url, None, std_headers)
 543
 544                 # Establish possible resume length
 545                 if os.path.isfile(tmpfilename):
 546                         resume_len = os.path.getsize(tmpfilename)
 547                 else:
 548                         resume_len = 0
 549
 550                 # Request parameters in case of being able to resume
 551                 if self.params.get('continuedl', False) and resume_len != 0:
 552                         self.report_resuming_byte(resume_len)
 553                         request.add_header('Range','bytes=%d-' % resume_len)
 554                         open_mode = 'ab'
 555
 556                 count = 0
 557                 retries = self.params.get('retries', 0)
 558                 while count <= retries:
 559                         # Establish connection
 560                         try:
 561                                 data = urllib2.urlopen(request)
 562                                 break
 563                         except (urllib2.HTTPError, ), err:
 564                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 565                                         # Unexpected HTTP error
 566                                         raise
 567                                 elif err.code == 416:
 568                                         # Unable to resume (requested range not satisfiable)
 569                                         try:
 570                                                 # Open the connection again without the range header
 571                                                 data = urllib2.urlopen(basic_request)
 572                                                 content_length = data.info()['Content-Length']
 573                                         except (urllib2.HTTPError, ), err:
 574                                                 if err.code < 500 or err.code >= 600:
 575                                                         raise
 576                                         else:
 577                                                 # Examine the reported length
 578                                                 if (content_length is not None and
 579                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
 580                                                         # The file had already been fully downloaded.
 581                                                         # Explanation to the above condition: in issue #175 it was revealed that
 582                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
 583                                                         # changing the file size slightly and causing problems for some users. So
 584                                                         # I decided to implement a suggested change and consider the file
 585                                                         # completely downloaded if the file size differs less than 100 bytes from
 586                                                         # the one in the hard drive.
 587                                                         self.report_file_already_downloaded(filename)
 588                                                         self.try_rename(tmpfilename, filename)
 589                                                         return True
 590                                                 else:
 591                                                         # The length does not match, we start the download over
 592                                                         self.report_unable_to_resume()
 593                                                         open_mode = 'wb'
 594                                                         break
 595                         # Retry
 596                         count += 1
 597                         if count <= retries:
 598                                 self.report_retry(count, retries)
 599
 600                 if count > retries:
 601                         self.trouble(u'ERROR: giving up after %s retries' % retries)
 602                         return False
 603
 604                 data_len = data.info().get('Content-length', None)
 605                 data_len_str = self.format_bytes(data_len)
 606                 byte_counter = 0
 607                 block_size = 1024
 608                 start = time.time()
 609                 while True:
 610                         # Download and write
 611                         before = time.time()
 612                         data_block = data.read(block_size)
 613                         after = time.time()
 614                         data_block_len = len(data_block)
 615                         if data_block_len == 0:
 616                                 break
 617                         byte_counter += data_block_len
 618
 619                         # Open file just in time
 620                         if stream is None:
 621                                 try:
 622                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 623                                         self.report_destination(filename)
 624                                 except (OSError, IOError), err:
 625                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 626                                         return False
 627                         try:
 628                                 stream.write(data_block)
 629                         except (IOError, OSError), err:
 630                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 631                                 return False
 632                         block_size = self.best_block_size(after - before, data_block_len)
 633
 634                         # Progress message
 635                         percent_str = self.calc_percent(byte_counter, data_len)
 636                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
 637                         speed_str = self.calc_speed(start, time.time(), byte_counter)
 638                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 639
 640                         # Apply rate limit
 641                         self.slow_down(start, byte_counter)
 642
 643                 self.report_finish()
 644                 if data_len is not None and str(byte_counter) != data_len:
 645                         raise ContentTooShortError(byte_counter, long(data_len))
 646                 self.try_rename(tmpfilename, filename)
 647                 return True
 648
 649 class InfoExtractor(object):
 650         """Information Extractor class.
 651
 652         Information extractors are the classes that, given a URL, extract
 653         information from the video (or videos) the URL refers to. This
 654         information includes the real video URL, the video title and simplified
 655         title, author and others. The information is stored in a dictionary
 656         which is then passed to the FileDownloader. The FileDownloader
 657         processes this information possibly downloading the video to the file
 658         system, among other possible outcomes. The dictionaries must include
 659         the following fields:
 660
 661         id:             Video identifier.
 662         url:            Final video URL.
 663         uploader:       Nickname of the video uploader.
 664         title:          Literal title.
 665         stitle:         Simplified title.
 666         ext:            Video filename extension.
 667         format:         Video format.
 668         player_url:     SWF Player URL (may be None).
 669
 670         The following fields are optional. Their primary purpose is to allow
 671         youtube-dl to serve as the backend for a video search function, such
 672         as the one in youtube2mp3.  They are only used when their respective
 673         forced printing functions are called:
 674
 675         thumbnail:      Full URL to a video thumbnail image.
 676         description:    One-line video description.
 677
 678         Subclasses of this one should re-define the _real_initialize() and
 679         _real_extract() methods, as well as the suitable() static method.
 680         Probably, they should also be instantiated and added to the main
 681         downloader.
 682         """
 683
 684         _ready = False
 685         _downloader = None
 686
 687         def __init__(self, downloader=None):
 688                 """Constructor. Receives an optional downloader."""
 689                 self._ready = False
 690                 self.set_downloader(downloader)
 691
 692         @staticmethod
 693         def suitable(url):
 694                 """Receives a URL and returns True if suitable for this IE."""
 695                 return False
 696
 697         def initialize(self):
 698                 """Initializes an instance (authentication, etc)."""
 699                 if not self._ready:
 700                         self._real_initialize()
 701                         self._ready = True
 702
 703         def extract(self, url):
 704                 """Extracts URL information and returns it in list of dicts."""
 705                 self.initialize()
 706                 return self._real_extract(url)
 707
 708         def set_downloader(self, downloader):
 709                 """Sets the downloader for this IE."""
 710                 self._downloader = downloader
 711
 712         def _real_initialize(self):
 713                 """Real initialization process. Redefine in subclasses."""
 714                 pass
 715
 716         def _real_extract(self, url):
 717                 """Real extraction process. Redefine in subclasses."""
 718                 pass
 719
 720 class YoutubeIE(InfoExtractor):
 721         """Information extractor for youtube.com."""
 722
 723         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
 724         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 725         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
 726         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 727         _NETRC_MACHINE = 'youtube'
 728         # Listed in order of quality
 729         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
 730         _video_extensions = {
 731                 '13': '3gp',
 732                 '17': 'mp4',
 733                 '18': 'mp4',
 734                 '22': 'mp4',
 735                 '37': 'mp4',
 736                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
 737                 '43': 'webm',
 738                 '45': 'webm',
 739         }
 740
 741         @staticmethod
 742         def suitable(url):
 743                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 744
 745         def report_lang(self):
 746                 """Report attempt to set language."""
 747                 self._downloader.to_screen(u'[youtube] Setting language')
 748
 749         def report_login(self):
 750                 """Report attempt to log in."""
 751                 self._downloader.to_screen(u'[youtube] Logging in')
 752
 753         def report_age_confirmation(self):
 754                 """Report attempt to confirm age."""
 755                 self._downloader.to_screen(u'[youtube] Confirming age')
 756
 757         def report_video_webpage_download(self, video_id):
 758                 """Report attempt to download video webpage."""
 759                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
 760
 761         def report_video_info_webpage_download(self, video_id):
 762                 """Report attempt to download video info webpage."""
 763                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
 764
 765         def report_information_extraction(self, video_id):
 766                 """Report attempt to extract video information."""
 767                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
 768
 769         def report_unavailable_format(self, video_id, format):
 770                 """Report extracted video URL."""
 771                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
 772
 773         def report_rtmp_download(self):
 774                 """Indicate the download will use the RTMP protocol."""
 775                 self._downloader.to_screen(u'[youtube] RTMP download detected')
 776
 777         def _real_initialize(self):
 778                 if self._downloader is None:
 779                         return
 780
 781                 username = None
 782                 password = None
 783                 downloader_params = self._downloader.params
 784
 785                 # Attempt to use provided username and password or .netrc data
 786                 if downloader_params.get('username', None) is not None:
 787                         username = downloader_params['username']
 788                         password = downloader_params['password']
 789                 elif downloader_params.get('usenetrc', False):
 790                         try:
 791                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 792                                 if info is not None:
 793                                         username = info[0]
 794                                         password = info[2]
 795                                 else:
 796                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 797                         except (IOError, netrc.NetrcParseError), err:
 798                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 799                                 return
 800
 801                 # Set language
 802                 request = urllib2.Request(self._LANG_URL, None, std_headers)
 803                 try:
 804                         self.report_lang()
 805                         urllib2.urlopen(request).read()
 806                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 807                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 808                         return
 809
 810                 # No authentication to be performed
 811                 if username is None:
 812                         return
 813
 814                 # Log in
 815                 login_form = {
 816                                 'current_form': 'loginForm',
 817                                 'next':         '/',
 818                                 'action_login': 'Log In',
 819                                 'username':     username,
 820                                 'password':     password,
 821                                 }
 822                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
 823                 try:
 824                         self.report_login()
 825                         login_results = urllib2.urlopen(request).read()
 826                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 827                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 828                                 return
 829                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 830                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 831                         return
 832
 833                 # Confirm age
 834                 age_form = {
 835                                 'next_url':             '/',
 836                                 'action_confirm':       'Confirm',
 837                                 }
 838                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
 839                 try:
 840                         self.report_age_confirmation()
 841                         age_results = urllib2.urlopen(request).read()
 842                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 843                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 844                         return
 845
 846         def _real_extract(self, url):
 847                 # Extract video id from URL
 848                 mobj = re.match(self._VALID_URL, url)
 849                 if mobj is None:
 850                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 851                         return
 852                 video_id = mobj.group(2)
 853
 854                 # Get video webpage
 855                 self.report_video_webpage_download(video_id)
 856                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id, None, std_headers)
 857                 try:
 858                         video_webpage = urllib2.urlopen(request).read()
 859                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 860                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
 861                         return
 862
 863                 # Attempt to extract SWF player URL
 864                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
 865                 if mobj is not None:
 866                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
 867                 else:
 868                         player_url = None
 869
 870                 # Get video info
 871                 self.report_video_info_webpage_download(video_id)
 872                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
 873                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
 874                                            % (video_id, el_type))
 875                         request = urllib2.Request(video_info_url, None, std_headers)
 876                         try:
 877                                 video_info_webpage = urllib2.urlopen(request).read()
 878                                 video_info = parse_qs(video_info_webpage)
 879                                 if 'token' in video_info:
 880                                         break
 881                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 882                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
 883                                 return
 884                 if 'token' not in video_info:
 885                         if 'reason' in video_info:
 886                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
 887                         else:
 888                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
 889                         return
 890
 891                 # Start extracting information
 892                 self.report_information_extraction(video_id)
 893
 894                 # uploader
 895                 if 'author' not in video_info:
 896                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 897                         return
 898                 video_uploader = urllib.unquote_plus(video_info['author'][0])
 899
 900                 # title
 901                 if 'title' not in video_info:
 902                         self._downloader.trouble(u'ERROR: unable to extract video title')
 903                         return
 904                 video_title = urllib.unquote_plus(video_info['title'][0])
 905                 video_title = video_title.decode('utf-8')
 906                 video_title = sanitize_title(video_title)
 907
 908                 # simplified title
 909                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 910                 simple_title = simple_title.strip(ur'_')
 911
 912                 # thumbnail image
 913                 if 'thumbnail_url' not in video_info:
 914                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
 915                         video_thumbnail = ''
 916                 else:   # don't panic if we can't find it
 917                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
 918
 919                 # upload date
 920                 upload_date = u'NA'
 921                 mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
 922                 if mobj is not None:
 923                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
 924                         format_expressions = ['%d %B %Y', '%B %d %Y']
 925                         for expression in format_expressions:
 926                                 try:
 927                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
 928                                 except:
 929                                         pass
 930
 931                 # description
 932                 video_description = 'No description available.'
 933                 if self._downloader.params.get('forcedescription', False):
 934                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
 935                         if mobj is not None:
 936                                 video_description = mobj.group(1)
 937
 938                 # token
 939                 video_token = urllib.unquote_plus(video_info['token'][0])
 940
 941                 # Decide which formats to download
 942                 requested_format = self._downloader.params.get('format', None)
 943                 get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
 944
 945                 if 'fmt_url_map' in video_info:
 946                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
 947                         format_limit = self._downloader.params.get('format_limit', None)
 948                         if format_limit is not None and format_limit in self._available_formats:
 949                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
 950                         else:
 951                                 format_list = self._available_formats
 952                         existing_formats = [x for x in format_list if x in url_map]
 953                         if len(existing_formats) == 0:
 954                                 self._downloader.trouble(u'ERROR: no known formats available for video')
 955                                 return
 956                         if requested_format is None:
 957                                 video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality
 958                         elif requested_format == '-1':
 959                                 video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats
 960                         else:
 961                                 video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format
 962
 963                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
 964                         self.report_rtmp_download()
 965                         video_url_list = [(None, video_info['conn'][0])]
 966
 967                 else:
 968                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
 969                         return
 970
 971                 for format_param, video_real_url in video_url_list:
 972                         # At this point we have a new video
 973                         self._downloader.increment_downloads()
 974
 975                         # Extension
 976                         video_extension = self._video_extensions.get(format_param, 'flv')
 977
 978                         # Find the video URL in fmt_url_map or conn paramters
 979                         try:
 980                                 # Process video information
 981                                 self._downloader.process_info({
 982                                         'id':           video_id.decode('utf-8'),
 983                                         'url':          video_real_url.decode('utf-8'),
 984                                         'uploader':     video_uploader.decode('utf-8'),
 985                                         'upload_date':  upload_date,
 986                                         'title':        video_title,
 987                                         'stitle':       simple_title,
 988                                         'ext':          video_extension.decode('utf-8'),
 989                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
 990                                         'thumbnail':    video_thumbnail.decode('utf-8'),
 991                                         'description':  video_description.decode('utf-8'),
 992                                         'player_url':   player_url,
 993                                 })
 994                         except UnavailableVideoError, err:
 995                                 self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
 996
 997
 998 class MetacafeIE(InfoExtractor):
 999         """Information Extractor for metacafe.com."""
1000
1001         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1002         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1003         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1004         _youtube_ie = None
1005
1006         def __init__(self, youtube_ie, downloader=None):
1007                 InfoExtractor.__init__(self, downloader)
1008                 self._youtube_ie = youtube_ie
1009
1010         @staticmethod
1011         def suitable(url):
1012                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1013
1014         def report_disclaimer(self):
1015                 """Report disclaimer retrieval."""
1016                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1017
1018         def report_age_confirmation(self):
1019                 """Report attempt to confirm age."""
1020                 self._downloader.to_screen(u'[metacafe] Confirming age')
1021
1022         def report_download_webpage(self, video_id):
1023                 """Report webpage download."""
1024                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1025
1026         def report_extraction(self, video_id):
1027                 """Report information extraction."""
1028                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1029
1030         def _real_initialize(self):
1031                 # Retrieve disclaimer
1032                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
1033                 try:
1034                         self.report_disclaimer()
1035                         disclaimer = urllib2.urlopen(request).read()
1036                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1037                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1038                         return
1039
1040                 # Confirm age
1041                 disclaimer_form = {
1042                         'filters': '0',
1043                         'submit': "Continue - I'm over 18",
1044                         }
1045                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
1046                 try:
1047                         self.report_age_confirmation()
1048                         disclaimer = urllib2.urlopen(request).read()
1049                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1050                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1051                         return
1052
1053         def _real_extract(self, url):
1054                 # Extract id and simplified title from URL
1055                 mobj = re.match(self._VALID_URL, url)
1056                 if mobj is None:
1057                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1058                         return
1059
1060                 video_id = mobj.group(1)
1061
1062                 # Check if video comes from YouTube
1063                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1064                 if mobj2 is not None:
1065                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1066                         return
1067
1068                 # At this point we have a new video
1069                 self._downloader.increment_downloads()
1070
1071                 simple_title = mobj.group(2).decode('utf-8')
1072
1073                 # Retrieve video webpage to extract further information
1074                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1075                 try:
1076                         self.report_download_webpage(video_id)
1077                         webpage = urllib2.urlopen(request).read()
1078                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1079                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1080                         return
1081
1082                 # Extract URL, uploader and title from webpage
1083                 self.report_extraction(video_id)
1084                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1085                 if mobj is not None:
1086                         mediaURL = urllib.unquote(mobj.group(1))
1087                         video_extension = mediaURL[-3:]
1088
1089                         # Extract gdaKey if available
1090                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1091                         if mobj is None:
1092                                 video_url = mediaURL
1093                         else:
1094                                 gdaKey = mobj.group(1)
1095                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1096                 else:
1097                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1098                         if mobj is None:
1099                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1100                                 return
1101                         vardict = parse_qs(mobj.group(1))
1102                         if 'mediaData' not in vardict:
1103                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1104                                 return
1105                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1106                         if mobj is None:
1107                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1108                                 return
1109                         mediaURL = mobj.group(1).replace('\\/', '/')
1110                         video_extension = mediaURL[-3:]
1111                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1112
1113                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1114                 if mobj is None:
1115                         self._downloader.trouble(u'ERROR: unable to extract title')
1116                         return
1117                 video_title = mobj.group(1).decode('utf-8')
1118                 video_title = sanitize_title(video_title)
1119
1120                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1121                 if mobj is None:
1122                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1123                         return
1124                 video_uploader = mobj.group(1)
1125
1126                 try:
1127                         # Process video information
1128                         self._downloader.process_info({
1129                                 'id':           video_id.decode('utf-8'),
1130                                 'url':          video_url.decode('utf-8'),
1131                                 'uploader':     video_uploader.decode('utf-8'),
1132                                 'upload_date':  u'NA',
1133                                 'title':        video_title,
1134                                 'stitle':       simple_title,
1135                                 'ext':          video_extension.decode('utf-8'),
1136                                 'format':       u'NA',
1137                                 'player_url':   None,
1138                         })
1139                 except UnavailableVideoError:
1140                         self._downloader.trouble(u'ERROR: unable to download video')
1141
1142
1143 class DailymotionIE(InfoExtractor):
1144         """Information Extractor for Dailymotion"""
1145
1146         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1147
1148         def __init__(self, downloader=None):
1149                 InfoExtractor.__init__(self, downloader)
1150
1151         @staticmethod
1152         def suitable(url):
1153                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1154
1155         def report_download_webpage(self, video_id):
1156                 """Report webpage download."""
1157                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1158
1159         def report_extraction(self, video_id):
1160                 """Report information extraction."""
1161                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1162
1163         def _real_initialize(self):
1164                 return
1165
1166         def _real_extract(self, url):
1167                 # Extract id and simplified title from URL
1168                 mobj = re.match(self._VALID_URL, url)
1169                 if mobj is None:
1170                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1171                         return
1172
1173                 # At this point we have a new video
1174                 self._downloader.increment_downloads()
1175                 video_id = mobj.group(1)
1176
1177                 simple_title = mobj.group(2).decode('utf-8')
1178                 video_extension = 'flv'
1179
1180                 # Retrieve video webpage to extract further information
1181                 request = urllib2.Request(url)
1182                 try:
1183                         self.report_download_webpage(video_id)
1184                         webpage = urllib2.urlopen(request).read()
1185                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1186                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1187                         return
1188
1189                 # Extract URL, uploader and title from webpage
1190                 self.report_extraction(video_id)
1191                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1192                 if mobj is None:
1193                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1194                         return
1195                 mediaURL = urllib.unquote(mobj.group(1))
1196
1197                 # if needed add http://www.dailymotion.com/ if relative URL
1198
1199                 video_url = mediaURL
1200
1201                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1202                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1203                 if mobj is None:
1204                         self._downloader.trouble(u'ERROR: unable to extract title')
1205                         return
1206                 video_title = mobj.group(1).decode('utf-8')
1207                 video_title = sanitize_title(video_title)
1208
1209                 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
1210                 if mobj is None:
1211                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1212                         return
1213                 video_uploader = mobj.group(1)
1214
1215                 try:
1216                         # Process video information
1217                         self._downloader.process_info({
1218                                 'id':           video_id.decode('utf-8'),
1219                                 'url':          video_url.decode('utf-8'),
1220                                 'uploader':     video_uploader.decode('utf-8'),
1221                                 'upload_date':  u'NA',
1222                                 'title':        video_title,
1223                                 'stitle':       simple_title,
1224                                 'ext':          video_extension.decode('utf-8'),
1225                                 'format':       u'NA',
1226                                 'player_url':   None,
1227                         })
1228                 except UnavailableVideoError:
1229                         self._downloader.trouble(u'ERROR: unable to download video')
1230
1231 class GoogleIE(InfoExtractor):
1232         """Information extractor for video.google.com."""
1233
1234         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1235
1236         def __init__(self, downloader=None):
1237                 InfoExtractor.__init__(self, downloader)
1238
1239         @staticmethod
1240         def suitable(url):
1241                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1242
1243         def report_download_webpage(self, video_id):
1244                 """Report webpage download."""
1245                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1246
1247         def report_extraction(self, video_id):
1248                 """Report information extraction."""
1249                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1250
1251         def _real_initialize(self):
1252                 return
1253
1254         def _real_extract(self, url):
1255                 # Extract id from URL
1256                 mobj = re.match(self._VALID_URL, url)
1257                 if mobj is None:
1258                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1259                         return
1260
1261                 # At this point we have a new video
1262                 self._downloader.increment_downloads()
1263                 video_id = mobj.group(1)
1264
1265                 video_extension = 'mp4'
1266
1267                 # Retrieve video webpage to extract further information
1268                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1269                 try:
1270                         self.report_download_webpage(video_id)
1271                         webpage = urllib2.urlopen(request).read()
1272                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1273                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1274                         return
1275
1276                 # Extract URL, uploader, and title from webpage
1277                 self.report_extraction(video_id)
1278                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1279                 if mobj is None:
1280                         video_extension = 'flv'
1281                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1282                 if mobj is None:
1283                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1284                         return
1285                 mediaURL = urllib.unquote(mobj.group(1))
1286                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1287                 mediaURL = mediaURL.replace('\\x26', '\x26')
1288
1289                 video_url = mediaURL
1290
1291                 mobj = re.search(r'<title>(.*)</title>', webpage)
1292                 if mobj is None:
1293                         self._downloader.trouble(u'ERROR: unable to extract title')
1294                         return
1295                 video_title = mobj.group(1).decode('utf-8')
1296                 video_title = sanitize_title(video_title)
1297                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1298
1299                 # Extract video description
1300                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1301                 if mobj is None:
1302                         self._downloader.trouble(u'ERROR: unable to extract video description')
1303                         return
1304                 video_description = mobj.group(1).decode('utf-8')
1305                 if not video_description:
1306                         video_description = 'No description available.'
1307
1308                 # Extract video thumbnail
1309                 if self._downloader.params.get('forcethumbnail', False):
1310                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1311                         try:
1312                                 webpage = urllib2.urlopen(request).read()
1313                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1314                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1315                                 return
1316                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1317                         if mobj is None:
1318                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1319                                 return
1320                         video_thumbnail = mobj.group(1)
1321                 else:   # we need something to pass to process_info
1322                         video_thumbnail = ''
1323
1324
1325                 try:
1326                         # Process video information
1327                         self._downloader.process_info({
1328                                 'id':           video_id.decode('utf-8'),
1329                                 'url':          video_url.decode('utf-8'),
1330                                 'uploader':     u'NA',
1331                                 'upload_date':  u'NA',
1332                                 'title':        video_title,
1333                                 'stitle':       simple_title,
1334                                 'ext':          video_extension.decode('utf-8'),
1335                                 'format':       u'NA',
1336                                 'player_url':   None,
1337                         })
1338                 except UnavailableVideoError:
1339                         self._downloader.trouble(u'ERROR: unable to download video')
1340
1341
1342 class PhotobucketIE(InfoExtractor):
1343         """Information extractor for photobucket.com."""
1344
1345         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1346
1347         def __init__(self, downloader=None):
1348                 InfoExtractor.__init__(self, downloader)
1349
1350         @staticmethod
1351         def suitable(url):
1352                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1353
1354         def report_download_webpage(self, video_id):
1355                 """Report webpage download."""
1356                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1357
1358         def report_extraction(self, video_id):
1359                 """Report information extraction."""
1360                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1361
1362         def _real_initialize(self):
1363                 return
1364
1365         def _real_extract(self, url):
1366                 # Extract id from URL
1367                 mobj = re.match(self._VALID_URL, url)
1368                 if mobj is None:
1369                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1370                         return
1371
1372                 # At this point we have a new video
1373                 self._downloader.increment_downloads()
1374                 video_id = mobj.group(1)
1375
1376                 video_extension = 'flv'
1377
1378                 # Retrieve video webpage to extract further information
1379                 request = urllib2.Request(url)
1380                 try:
1381                         self.report_download_webpage(video_id)
1382                         webpage = urllib2.urlopen(request).read()
1383                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1384                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1385                         return
1386
1387                 # Extract URL, uploader, and title from webpage
1388                 self.report_extraction(video_id)
1389                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1390                 if mobj is None:
1391                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1392                         return
1393                 mediaURL = urllib.unquote(mobj.group(1))
1394
1395                 video_url = mediaURL
1396
1397                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1398                 if mobj is None:
1399                         self._downloader.trouble(u'ERROR: unable to extract title')
1400                         return
1401                 video_title = mobj.group(1).decode('utf-8')
1402                 video_title = sanitize_title(video_title)
1403                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1404
1405                 video_uploader = mobj.group(2).decode('utf-8')
1406
1407                 try:
1408                         # Process video information
1409                         self._downloader.process_info({
1410                                 'id':           video_id.decode('utf-8'),
1411                                 'url':          video_url.decode('utf-8'),
1412                                 'uploader':     video_uploader,
1413                                 'upload_date':  u'NA',
1414                                 'title':        video_title,
1415                                 'stitle':       simple_title,
1416                                 'ext':          video_extension.decode('utf-8'),
1417                                 'format':       u'NA',
1418                                 'player_url':   None,
1419                         })
1420                 except UnavailableVideoError:
1421                         self._downloader.trouble(u'ERROR: unable to download video')
1422
1423
1424 class YahooIE(InfoExtractor):
1425         """Information extractor for video.yahoo.com."""
1426
1427         # _VALID_URL matches all Yahoo! Video URLs
1428         # _VPAGE_URL matches only the extractable '/watch/' URLs
1429         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1430         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1431
1432         def __init__(self, downloader=None):
1433                 InfoExtractor.__init__(self, downloader)
1434
1435         @staticmethod
1436         def suitable(url):
1437                 return (re.match(YahooIE._VALID_URL, url) is not None)
1438
1439         def report_download_webpage(self, video_id):
1440                 """Report webpage download."""
1441                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1442
1443         def report_extraction(self, video_id):
1444                 """Report information extraction."""
1445                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1446
1447         def _real_initialize(self):
1448                 return
1449
1450         def _real_extract(self, url, new_video=True):
1451                 # Extract ID from URL
1452                 mobj = re.match(self._VALID_URL, url)
1453                 if mobj is None:
1454                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1455                         return
1456
1457                 # At this point we have a new video
1458                 self._downloader.increment_downloads()
1459                 video_id = mobj.group(2)
1460                 video_extension = 'flv'
1461
1462                 # Rewrite valid but non-extractable URLs as
1463                 # extractable English language /watch/ URLs
1464                 if re.match(self._VPAGE_URL, url) is None:
1465                         request = urllib2.Request(url)
1466                         try:
1467                                 webpage = urllib2.urlopen(request).read()
1468                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1469                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1470                                 return
1471
1472                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1473                         if mobj is None:
1474                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1475                                 return
1476                         yahoo_id = mobj.group(1)
1477
1478                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1479                         if mobj is None:
1480                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1481                                 return
1482                         yahoo_vid = mobj.group(1)
1483
1484                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1485                         return self._real_extract(url, new_video=False)
1486
1487                 # Retrieve video webpage to extract further information
1488                 request = urllib2.Request(url)
1489                 try:
1490                         self.report_download_webpage(video_id)
1491                         webpage = urllib2.urlopen(request).read()
1492                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1493                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1494                         return
1495
1496                 # Extract uploader and title from webpage
1497                 self.report_extraction(video_id)
1498                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1499                 if mobj is None:
1500                         self._downloader.trouble(u'ERROR: unable to extract video title')
1501                         return
1502                 video_title = mobj.group(1).decode('utf-8')
1503                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1504
1505                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1506                 if mobj is None:
1507                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1508                         return
1509                 video_uploader = mobj.group(1).decode('utf-8')
1510
1511                 # Extract video thumbnail
1512                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1513                 if mobj is None:
1514                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1515                         return
1516                 video_thumbnail = mobj.group(1).decode('utf-8')
1517
1518                 # Extract video description
1519                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1520                 if mobj is None:
1521                         self._downloader.trouble(u'ERROR: unable to extract video description')
1522                         return
1523                 video_description = mobj.group(1).decode('utf-8')
1524                 if not video_description: video_description = 'No description available.'
1525
1526                 # Extract video height and width
1527                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1528                 if mobj is None:
1529                         self._downloader.trouble(u'ERROR: unable to extract video height')
1530                         return
1531                 yv_video_height = mobj.group(1)
1532
1533                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1534                 if mobj is None:
1535                         self._downloader.trouble(u'ERROR: unable to extract video width')
1536                         return
1537                 yv_video_width = mobj.group(1)
1538
1539                 # Retrieve video playlist to extract media URL
1540                 # I'm not completely sure what all these options are, but we
1541                 # seem to need most of them, otherwise the server sends a 401.
1542                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1543                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1544                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1545                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1546                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1547                 try:
1548                         self.report_download_webpage(video_id)
1549                         webpage = urllib2.urlopen(request).read()
1550                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1551                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1552                         return
1553
1554                 # Extract media URL from playlist XML
1555                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1556                 if mobj is None:
1557                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1558                         return
1559                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1560                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1561
1562                 try:
1563                         # Process video information
1564                         self._downloader.process_info({
1565                                 'id':           video_id.decode('utf-8'),
1566                                 'url':          video_url,
1567                                 'uploader':     video_uploader,
1568                                 'upload_date':  u'NA',
1569                                 'title':        video_title,
1570                                 'stitle':       simple_title,
1571                                 'ext':          video_extension.decode('utf-8'),
1572                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1573                                 'description':  video_description,
1574                                 'thumbnail':    video_thumbnail,
1575                                 'description':  video_description,
1576                                 'player_url':   None,
1577                         })
1578                 except UnavailableVideoError:
1579                         self._downloader.trouble(u'ERROR: unable to download video')
1580
1581
1582 class GenericIE(InfoExtractor):
1583         """Generic last-resort information extractor."""
1584
1585         def __init__(self, downloader=None):
1586                 InfoExtractor.__init__(self, downloader)
1587
1588         @staticmethod
1589         def suitable(url):
1590                 return True
1591
1592         def report_download_webpage(self, video_id):
1593                 """Report webpage download."""
1594                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1595                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1596
1597         def report_extraction(self, video_id):
1598                 """Report information extraction."""
1599                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1600
1601         def _real_initialize(self):
1602                 return
1603
1604         def _real_extract(self, url):
1605                 # At this point we have a new video
1606                 self._downloader.increment_downloads()
1607
1608                 video_id = url.split('/')[-1]
1609                 request = urllib2.Request(url)
1610                 try:
1611                         self.report_download_webpage(video_id)
1612                         webpage = urllib2.urlopen(request).read()
1613                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1614                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1615                         return
1616                 except ValueError, err:
1617                         # since this is the last-resort InfoExtractor, if
1618                         # this error is thrown, it'll be thrown here
1619                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1620                         return
1621
1622                 # Start with something easy: JW Player in SWFObject
1623                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1624                 if mobj is None:
1625                         # Broaden the search a little bit
1626                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1627                 if mobj is None:
1628                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1629                         return
1630
1631                 # It's possible that one of the regexes
1632                 # matched, but returned an empty group:
1633                 if mobj.group(1) is None:
1634                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1635                         return
1636
1637                 video_url = urllib.unquote(mobj.group(1))
1638                 video_id  = os.path.basename(video_url)
1639
1640                 # here's a fun little line of code for you:
1641                 video_extension = os.path.splitext(video_id)[1][1:]
1642                 video_id        = os.path.splitext(video_id)[0]
1643
1644                 # it's tempting to parse this further, but you would
1645                 # have to take into account all the variations like
1646                 #   Video Title - Site Name
1647                 #   Site Name | Video Title
1648                 #   Video Title - Tagline | Site Name
1649                 # and so on and so forth; it's just not practical
1650                 mobj = re.search(r'<title>(.*)</title>', webpage)
1651                 if mobj is None:
1652                         self._downloader.trouble(u'ERROR: unable to extract title')
1653                         return
1654                 video_title = mobj.group(1).decode('utf-8')
1655                 video_title = sanitize_title(video_title)
1656                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1657
1658                 # video uploader is domain name
1659                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1660                 if mobj is None:
1661                         self._downloader.trouble(u'ERROR: unable to extract title')
1662                         return
1663                 video_uploader = mobj.group(1).decode('utf-8')
1664
1665                 try:
1666                         # Process video information
1667                         self._downloader.process_info({
1668                                 'id':           video_id.decode('utf-8'),
1669                                 'url':          video_url.decode('utf-8'),
1670                                 'uploader':     video_uploader,
1671                                 'upload_date':  u'NA',
1672                                 'title':        video_title,
1673                                 'stitle':       simple_title,
1674                                 'ext':          video_extension.decode('utf-8'),
1675                                 'format':       u'NA',
1676                                 'player_url':   None,
1677                         })
1678                 except UnavailableVideoError, err:
1679                         self._downloader.trouble(u'ERROR: unable to download video')
1680
1681
1682 class YoutubeSearchIE(InfoExtractor):
1683         """Information Extractor for YouTube search queries."""
1684         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1685         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1686         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1687         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1688         _youtube_ie = None
1689         _max_youtube_results = 1000
1690
1691         def __init__(self, youtube_ie, downloader=None):
1692                 InfoExtractor.__init__(self, downloader)
1693                 self._youtube_ie = youtube_ie
1694
1695         @staticmethod
1696         def suitable(url):
1697                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1698
1699         def report_download_page(self, query, pagenum):
1700                 """Report attempt to download playlist page with given number."""
1701                 query = query.decode(preferredencoding())
1702                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1703
1704         def _real_initialize(self):
1705                 self._youtube_ie.initialize()
1706
1707         def _real_extract(self, query):
1708                 mobj = re.match(self._VALID_QUERY, query)
1709                 if mobj is None:
1710                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1711                         return
1712
1713                 prefix, query = query.split(':')
1714                 prefix = prefix[8:]
1715                 query  = query.encode('utf-8')
1716                 if prefix == '':
1717                         self._download_n_results(query, 1)
1718                         return
1719                 elif prefix == 'all':
1720                         self._download_n_results(query, self._max_youtube_results)
1721                         return
1722                 else:
1723                         try:
1724                                 n = long(prefix)
1725                                 if n <= 0:
1726                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1727                                         return
1728                                 elif n > self._max_youtube_results:
1729                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1730                                         n = self._max_youtube_results
1731                                 self._download_n_results(query, n)
1732                                 return
1733                         except ValueError: # parsing prefix as integer fails
1734                                 self._download_n_results(query, 1)
1735                                 return
1736
1737         def _download_n_results(self, query, n):
1738                 """Downloads a specified number of results for a query"""
1739
1740                 video_ids = []
1741                 already_seen = set()
1742                 pagenum = 1
1743
1744                 while True:
1745                         self.report_download_page(query, pagenum)
1746                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1747                         request = urllib2.Request(result_url, None, std_headers)
1748                         try:
1749                                 page = urllib2.urlopen(request).read()
1750                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1751                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1752                                 return
1753
1754                         # Extract video identifiers
1755                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1756                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1757                                 if video_id not in already_seen:
1758                                         video_ids.append(video_id)
1759                                         already_seen.add(video_id)
1760                                         if len(video_ids) == n:
1761                                                 # Specified n videos reached
1762                                                 for id in video_ids:
1763                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1764                                                 return
1765
1766                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1767                                 for id in video_ids:
1768                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1769                                 return
1770
1771                         pagenum = pagenum + 1
1772
1773 class GoogleSearchIE(InfoExtractor):
1774         """Information Extractor for Google Video search queries."""
1775         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1776         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1777         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1778         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1779         _google_ie = None
1780         _max_google_results = 1000
1781
1782         def __init__(self, google_ie, downloader=None):
1783                 InfoExtractor.__init__(self, downloader)
1784                 self._google_ie = google_ie
1785
1786         @staticmethod
1787         def suitable(url):
1788                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1789
1790         def report_download_page(self, query, pagenum):
1791                 """Report attempt to download playlist page with given number."""
1792                 query = query.decode(preferredencoding())
1793                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1794
1795         def _real_initialize(self):
1796                 self._google_ie.initialize()
1797
1798         def _real_extract(self, query):
1799                 mobj = re.match(self._VALID_QUERY, query)
1800                 if mobj is None:
1801                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1802                         return
1803
1804                 prefix, query = query.split(':')
1805                 prefix = prefix[8:]
1806                 query  = query.encode('utf-8')
1807                 if prefix == '':
1808                         self._download_n_results(query, 1)
1809                         return
1810                 elif prefix == 'all':
1811                         self._download_n_results(query, self._max_google_results)
1812                         return
1813                 else:
1814                         try:
1815                                 n = long(prefix)
1816                                 if n <= 0:
1817                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1818                                         return
1819                                 elif n > self._max_google_results:
1820                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1821                                         n = self._max_google_results
1822                                 self._download_n_results(query, n)
1823                                 return
1824                         except ValueError: # parsing prefix as integer fails
1825                                 self._download_n_results(query, 1)
1826                                 return
1827
1828         def _download_n_results(self, query, n):
1829                 """Downloads a specified number of results for a query"""
1830
1831                 video_ids = []
1832                 already_seen = set()
1833                 pagenum = 1
1834
1835                 while True:
1836                         self.report_download_page(query, pagenum)
1837                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1838                         request = urllib2.Request(result_url, None, std_headers)
1839                         try:
1840                                 page = urllib2.urlopen(request).read()
1841                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1842                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1843                                 return
1844
1845                         # Extract video identifiers
1846                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1847                                 video_id = mobj.group(1)
1848                                 if video_id not in already_seen:
1849                                         video_ids.append(video_id)
1850                                         already_seen.add(video_id)
1851                                         if len(video_ids) == n:
1852                                                 # Specified n videos reached
1853                                                 for id in video_ids:
1854                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1855                                                 return
1856
1857                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1858                                 for id in video_ids:
1859                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1860                                 return
1861
1862                         pagenum = pagenum + 1
1863
1864 class YahooSearchIE(InfoExtractor):
1865         """Information Extractor for Yahoo! Video search queries."""
1866         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1867         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1868         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1869         _MORE_PAGES_INDICATOR = r'\s*Next'
1870         _yahoo_ie = None
1871         _max_yahoo_results = 1000
1872
1873         def __init__(self, yahoo_ie, downloader=None):
1874                 InfoExtractor.__init__(self, downloader)
1875                 self._yahoo_ie = yahoo_ie
1876
1877         @staticmethod
1878         def suitable(url):
1879                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1880
1881         def report_download_page(self, query, pagenum):
1882                 """Report attempt to download playlist page with given number."""
1883                 query = query.decode(preferredencoding())
1884                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1885
1886         def _real_initialize(self):
1887                 self._yahoo_ie.initialize()
1888
1889         def _real_extract(self, query):
1890                 mobj = re.match(self._VALID_QUERY, query)
1891                 if mobj is None:
1892                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1893                         return
1894
1895                 prefix, query = query.split(':')
1896                 prefix = prefix[8:]
1897                 query  = query.encode('utf-8')
1898                 if prefix == '':
1899                         self._download_n_results(query, 1)
1900                         return
1901                 elif prefix == 'all':
1902                         self._download_n_results(query, self._max_yahoo_results)
1903                         return
1904                 else:
1905                         try:
1906                                 n = long(prefix)
1907                                 if n <= 0:
1908                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1909                                         return
1910                                 elif n > self._max_yahoo_results:
1911                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
1912                                         n = self._max_yahoo_results
1913                                 self._download_n_results(query, n)
1914                                 return
1915                         except ValueError: # parsing prefix as integer fails
1916                                 self._download_n_results(query, 1)
1917                                 return
1918
1919         def _download_n_results(self, query, n):
1920                 """Downloads a specified number of results for a query"""
1921
1922                 video_ids = []
1923                 already_seen = set()
1924                 pagenum = 1
1925
1926                 while True:
1927                         self.report_download_page(query, pagenum)
1928                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1929                         request = urllib2.Request(result_url, None, std_headers)
1930                         try:
1931                                 page = urllib2.urlopen(request).read()
1932                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1933                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1934                                 return
1935
1936                         # Extract video identifiers
1937                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1938                                 video_id = mobj.group(1)
1939                                 if video_id not in already_seen:
1940                                         video_ids.append(video_id)
1941                                         already_seen.add(video_id)
1942                                         if len(video_ids) == n:
1943                                                 # Specified n videos reached
1944                                                 for id in video_ids:
1945                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1946                                                 return
1947
1948                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1949                                 for id in video_ids:
1950                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1951                                 return
1952
1953                         pagenum = pagenum + 1
1954
1955 class YoutubePlaylistIE(InfoExtractor):
1956         """Information Extractor for YouTube playlists."""
1957
1958         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1959         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1960         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1961         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1962         _youtube_ie = None
1963
1964         def __init__(self, youtube_ie, downloader=None):
1965                 InfoExtractor.__init__(self, downloader)
1966                 self._youtube_ie = youtube_ie
1967
1968         @staticmethod
1969         def suitable(url):
1970                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1971
1972         def report_download_page(self, playlist_id, pagenum):
1973                 """Report attempt to download playlist page with given number."""
1974                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1975
1976         def _real_initialize(self):
1977                 self._youtube_ie.initialize()
1978
1979         def _real_extract(self, url):
1980                 # Extract playlist id
1981                 mobj = re.match(self._VALID_URL, url)
1982                 if mobj is None:
1983                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1984                         return
1985
1986                 # Download playlist pages
1987                 playlist_id = mobj.group(1)
1988                 video_ids = []
1989                 pagenum = 1
1990
1991                 while True:
1992                         self.report_download_page(playlist_id, pagenum)
1993                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1994                         try:
1995                                 page = urllib2.urlopen(request).read()
1996                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1997                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1998                                 return
1999
2000                         # Extract video identifiers
2001                         ids_in_page = []
2002                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2003                                 if mobj.group(1) not in ids_in_page:
2004                                         ids_in_page.append(mobj.group(1))
2005                         video_ids.extend(ids_in_page)
2006
2007                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2008                                 break
2009                         pagenum = pagenum + 1
2010
2011                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2012                 playlistend = self._downloader.params.get('playlistend', -1)
2013                 video_ids = video_ids[playliststart:playlistend]
2014
2015                 for id in video_ids:
2016                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2017                 return
2018
2019 class YoutubeUserIE(InfoExtractor):
2020         """Information Extractor for YouTube users."""
2021
2022         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
2023         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2024         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
2025         _youtube_ie = None
2026
2027         def __init__(self, youtube_ie, downloader=None):
2028                 InfoExtractor.__init__(self, downloader)
2029                 self._youtube_ie = youtube_ie
2030
2031         @staticmethod
2032         def suitable(url):
2033                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2034
2035         def report_download_page(self, username):
2036                 """Report attempt to download user page."""
2037                 self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username))
2038
2039         def _real_initialize(self):
2040                 self._youtube_ie.initialize()
2041
2042         def _real_extract(self, url):
2043                 # Extract username
2044                 mobj = re.match(self._VALID_URL, url)
2045                 if mobj is None:
2046                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2047                         return
2048
2049                 # Download user page
2050                 username = mobj.group(1)
2051                 video_ids = []
2052                 pagenum = 1
2053
2054                 self.report_download_page(username)
2055                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
2056                 try:
2057                         page = urllib2.urlopen(request).read()
2058                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2059                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2060                         return
2061
2062                 # Extract video identifiers
2063                 ids_in_page = []
2064
2065                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2066                         if mobj.group(1) not in ids_in_page:
2067                                 ids_in_page.append(mobj.group(1))
2068                 video_ids.extend(ids_in_page)
2069
2070                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2071                 playlistend = self._downloader.params.get('playlistend', -1)
2072                 video_ids = video_ids[playliststart:playlistend]
2073
2074                 for id in video_ids:
2075                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2076                 return
2077
2078 class PostProcessor(object):
2079         """Post Processor class.
2080
2081         PostProcessor objects can be added to downloaders with their
2082         add_post_processor() method. When the downloader has finished a
2083         successful download, it will take its internal chain of PostProcessors
2084         and start calling the run() method on each one of them, first with
2085         an initial argument and then with the returned value of the previous
2086         PostProcessor.
2087
2088         The chain will be stopped if one of them ever returns None or the end
2089         of the chain is reached.
2090
2091         PostProcessor objects follow a "mutual registration" process similar
2092         to InfoExtractor objects.
2093         """
2094
2095         _downloader = None
2096
2097         def __init__(self, downloader=None):
2098                 self._downloader = downloader
2099
2100         def set_downloader(self, downloader):
2101                 """Sets the downloader for this PP."""
2102                 self._downloader = downloader
2103
2104         def run(self, information):
2105                 """Run the PostProcessor.
2106
2107                 The "information" argument is a dictionary like the ones
2108                 composed by InfoExtractors. The only difference is that this
2109                 one has an extra field called "filepath" that points to the
2110                 downloaded file.
2111
2112                 When this method returns None, the postprocessing chain is
2113                 stopped. However, this method may return an information
2114                 dictionary that will be passed to the next postprocessing
2115                 object in the chain. It can be the one it received after
2116                 changing some fields.
2117
2118                 In addition, this method may raise a PostProcessingError
2119                 exception that will be taken into account by the downloader
2120                 it was called from.
2121                 """
2122                 return information # by default, do nothing
2123
2124 ### MAIN PROGRAM ###
2125 if __name__ == '__main__':
2126         try:
2127                 # Modules needed only when running the main program
2128                 import getpass
2129                 import optparse
2130
2131                 # Function to update the program file with the latest version from bitbucket.org
2132                 def update_self(downloader, filename):
2133                         # Note: downloader only used for options
2134                         if not os.access (filename, os.W_OK):
2135                                 sys.exit('ERROR: no write permissions on %s' % filename)
2136
2137                         downloader.to_screen('Updating to latest stable version...')
2138                         latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2139                         latest_version = urllib.urlopen(latest_url).read().strip()
2140                         prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2141                         newcontent = urllib.urlopen(prog_url).read()
2142                         stream = open(filename, 'w')
2143                         stream.write(newcontent)
2144                         stream.close()
2145                         downloader.to_screen('Updated to version %s' % latest_version)
2146
2147                 # Parse command line
2148                 parser = optparse.OptionParser(
2149                         usage='Usage: %prog [options] url...',
2150                         version='2010.11.19',
2151                         conflict_handler='resolve',
2152                 )
2153
2154                 parser.add_option('-h', '--help',
2155                                 action='help', help='print this help text and exit')
2156                 parser.add_option('-v', '--version',
2157                                 action='version', help='print program version and exit')
2158                 parser.add_option('-U', '--update',
2159                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2160                 parser.add_option('-i', '--ignore-errors',
2161                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2162                 parser.add_option('-r', '--rate-limit',
2163                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2164                 parser.add_option('-R', '--retries',
2165                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2166                 parser.add_option('--playlist-start',
2167                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2168                 parser.add_option('--playlist-end',
2169                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2170
2171                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2172                 authentication.add_option('-u', '--username',
2173                                 dest='username', metavar='USERNAME', help='account username')
2174                 authentication.add_option('-p', '--password',
2175                                 dest='password', metavar='PASSWORD', help='account password')
2176                 authentication.add_option('-n', '--netrc',
2177                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2178                 parser.add_option_group(authentication)
2179
2180                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2181                 video_format.add_option('-f', '--format',
2182                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2183                 video_format.add_option('-m', '--mobile-version',
2184                                 action='store_const', dest='format', help='alias for -f 17', const='17')
2185                 video_format.add_option('--all-formats',
2186                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2187                 video_format.add_option('--max-quality',
2188                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2189                 video_format.add_option('-b', '--best-quality',
2190                                 action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)')
2191                 parser.add_option_group(video_format)
2192
2193                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2194                 verbosity.add_option('-q', '--quiet',
2195                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2196                 verbosity.add_option('-s', '--simulate',
2197                                 action='store_true', dest='simulate', help='do not download video', default=False)
2198                 verbosity.add_option('-g', '--get-url',
2199                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2200                 verbosity.add_option('-e', '--get-title',
2201                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2202                 verbosity.add_option('--get-thumbnail',
2203                                 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2204                 verbosity.add_option('--get-description',
2205                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2206                 verbosity.add_option('--no-progress',
2207                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2208                 parser.add_option_group(verbosity)
2209
2210                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2211                 filesystem.add_option('-t', '--title',
2212                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2213                 filesystem.add_option('-l', '--literal',
2214                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2215                 filesystem.add_option('-A', '--auto-number',
2216                                 action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False)
2217                 filesystem.add_option('-o', '--output',
2218                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2219                 filesystem.add_option('-a', '--batch-file',
2220                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2221                 filesystem.add_option('-w', '--no-overwrites',
2222                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2223                 filesystem.add_option('-c', '--continue',
2224                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2225                 filesystem.add_option('--cookies',
2226                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2227                 parser.add_option_group(filesystem)
2228
2229                 (opts, args) = parser.parse_args()
2230
2231                 # Open appropriate CookieJar
2232                 if opts.cookiefile is None:
2233                         jar = cookielib.CookieJar()
2234                 else:
2235                         try:
2236                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2237                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2238                                         jar.load()
2239                         except (IOError, OSError), err:
2240                                 sys.exit(u'ERROR: unable to open cookie file')
2241
2242                 # General configuration
2243                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2244                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2245                 urllib2.install_opener(urllib2.build_opener(cookie_processor))
2246                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2247
2248                 # Batch file verification
2249                 batchurls = []
2250                 if opts.batchfile is not None:
2251                         try:
2252                                 if opts.batchfile == '-':
2253                                         batchfd = sys.stdin
2254                                 else:
2255                                         batchfd = open(opts.batchfile, 'r')
2256                                 batchurls = batchfd.readlines()
2257                                 batchurls = [x.strip() for x in batchurls]
2258                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2259                         except IOError:
2260                                 sys.exit(u'ERROR: batch file could not be read')
2261                 all_urls = batchurls + args
2262
2263                 # Conflicting, missing and erroneous options
2264                 if opts.bestquality:
2265                         print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n'
2266                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2267                         parser.error(u'using .netrc conflicts with giving username/password')
2268                 if opts.password is not None and opts.username is None:
2269                         parser.error(u'account username missing')
2270                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2271                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2272                 if opts.usetitle and opts.useliteral:
2273                         parser.error(u'using title conflicts with using literal title')
2274                 if opts.username is not None and opts.password is None:
2275                         opts.password = getpass.getpass(u'Type account password and press return:')
2276                 if opts.ratelimit is not None:
2277                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2278                         if numeric_limit is None:
2279                                 parser.error(u'invalid rate limit specified')
2280                         opts.ratelimit = numeric_limit
2281                 if opts.retries is not None:
2282                         try:
2283                                 opts.retries = long(opts.retries)
2284                         except (TypeError, ValueError), err:
2285                                 parser.error(u'invalid retry count specified')
2286                 try:
2287                         opts.playliststart = long(opts.playliststart)
2288                         if opts.playliststart <= 0:
2289                                 raise ValueError
2290                 except (TypeError, ValueError), err:
2291                         parser.error(u'invalid playlist start number specified')
2292                 try:
2293                         opts.playlistend = long(opts.playlistend)
2294                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2295                                 raise ValueError
2296                 except (TypeError, ValueError), err:
2297                         parser.error(u'invalid playlist end number specified')
2298
2299                 # Information extractors
2300                 youtube_ie = YoutubeIE()
2301                 metacafe_ie = MetacafeIE(youtube_ie)
2302                 dailymotion_ie = DailymotionIE()
2303                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2304                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2305                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2306                 google_ie = GoogleIE()
2307                 google_search_ie = GoogleSearchIE(google_ie)
2308                 photobucket_ie = PhotobucketIE()
2309                 yahoo_ie = YahooIE()
2310                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2311                 generic_ie = GenericIE()
2312
2313                 # File downloader
2314                 fd = FileDownloader({
2315                         'usenetrc': opts.usenetrc,
2316                         'username': opts.username,
2317                         'password': opts.password,
2318                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2319                         'forceurl': opts.geturl,
2320                         'forcetitle': opts.gettitle,
2321                         'forcethumbnail': opts.getthumbnail,
2322                         'forcedescription': opts.getdescription,
2323                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2324                         'format': opts.format,
2325                         'format_limit': opts.format_limit,
2326                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2327                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2328                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2329                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2330                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2331                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2332                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2333                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2334                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2335                                 or u'%(id)s.%(ext)s'),
2336                         'ignoreerrors': opts.ignoreerrors,
2337                         'ratelimit': opts.ratelimit,
2338                         'nooverwrites': opts.nooverwrites,
2339                         'retries': opts.retries,
2340                         'continuedl': opts.continue_dl,
2341                         'noprogress': opts.noprogress,
2342                         'playliststart': opts.playliststart,
2343                         'playlistend': opts.playlistend,
2344                         'logtostderr': opts.outtmpl == '-',
2345                         })
2346                 fd.add_info_extractor(youtube_search_ie)
2347                 fd.add_info_extractor(youtube_pl_ie)
2348                 fd.add_info_extractor(youtube_user_ie)
2349                 fd.add_info_extractor(metacafe_ie)
2350                 fd.add_info_extractor(dailymotion_ie)
2351                 fd.add_info_extractor(youtube_ie)
2352                 fd.add_info_extractor(google_ie)
2353                 fd.add_info_extractor(google_search_ie)
2354                 fd.add_info_extractor(photobucket_ie)
2355                 fd.add_info_extractor(yahoo_ie)
2356                 fd.add_info_extractor(yahoo_search_ie)
2357
2358                 # This must come last since it's the
2359                 # fallback if none of the others work
2360                 fd.add_info_extractor(generic_ie)
2361
2362                 # Update version
2363                 if opts.update_self:
2364                         update_self(fd, sys.argv[0])
2365
2366                 # Maybe do nothing
2367                 if len(all_urls) < 1:
2368                         if not opts.update_self:
2369                                 parser.error(u'you must provide at least one URL')
2370                         else:
2371                                 sys.exit()
2372                 retcode = fd.download(all_urls)
2373
2374                 # Dump cookie jar if requested
2375                 if opts.cookiefile is not None:
2376                         try:
2377                                 jar.save()
2378                         except (IOError, OSError), err:
2379                                 sys.exit(u'ERROR: unable to save cookie jar')
2380
2381                 sys.exit(retcode)
2382
2383         except DownloadError:
2384                 sys.exit(1)
2385         except SameFileError:
2386                 sys.exit(u'ERROR: fixed output name but more than one file to download')
2387         except KeyboardInterrupt:
2388                 sys.exit(u'\nERROR: Interrupted by user')