youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # Author: Ricardo Garcia Gonzalez
   4 # Author: Danny Colligan
   5 # Author: Benjamin Johnson
   6 # License: Public domain code
   7 import htmlentitydefs
   8 import httplib
   9 import locale
  10 import math
  11 import netrc
  12 import os
  13 import os.path
  14 import re
  15 import socket
  16 import string
  17 import subprocess
  18 import sys
  19 import time
  20 import urllib
  21 import urllib2
  22
  23 # parse_qs was moved from the cgi module to the urlparse module recently.
  24 try:
  25         from urlparse import parse_qs
  26 except ImportError:
  27         from cgi import parse_qs
  28
  29 std_headers = {
  30         'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
  31         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  32         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
  33         'Accept-Language': 'en-us,en;q=0.5',
  34 }
  35
  36 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  37
  38 def preferredencoding():
  39         """Get preferred encoding.
  40
  41         Returns the best encoding scheme for the system, based on
  42         locale.getpreferredencoding() and some further tweaks.
  43         """
  44         def yield_preferredencoding():
  45                 try:
  46                         pref = locale.getpreferredencoding()
  47                         u'TEST'.encode(pref)
  48                 except:
  49                         pref = 'UTF-8'
  50                 while True:
  51                         yield pref
  52         return yield_preferredencoding().next()
  53
  54 def htmlentity_transform(matchobj):
  55         """Transforms an HTML entity to a Unicode character.
  56
  57         This function receives a match object and is intended to be used with
  58         the re.sub() function.
  59         """
  60         entity = matchobj.group(1)
  61
  62         # Known non-numeric HTML entity
  63         if entity in htmlentitydefs.name2codepoint:
  64                 return unichr(htmlentitydefs.name2codepoint[entity])
  65
  66         # Unicode character
  67         mobj = re.match(ur'(?u)#(x?\d+)', entity)
  68         if mobj is not None:
  69                 numstr = mobj.group(1)
  70                 if numstr.startswith(u'x'):
  71                         base = 16
  72                         numstr = u'0%s' % numstr
  73                 else:
  74                         base = 10
  75                 return unichr(long(numstr, base))
  76
  77         # Unknown entity in name, return its literal representation
  78         return (u'&%s;' % entity)
  79
  80 def sanitize_title(utitle):
  81         """Sanitizes a video title so it could be used as part of a filename."""
  82         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
  83         return utitle.replace(unicode(os.sep), u'%')
  84
  85 def sanitize_open(filename, open_mode):
  86         """Try to open the given filename, and slightly tweak it if this fails.
  87
  88         Attempts to open the given filename. If this fails, it tries to change
  89         the filename slightly, step by step, until it's either able to open it
  90         or it fails and raises a final exception, like the standard open()
  91         function.
  92
  93         It returns the tuple (stream, definitive_file_name).
  94         """
  95         try:
  96                 if filename == u'-':
  97                         return (sys.stdout, filename)
  98                 stream = open(filename, open_mode)
  99                 return (stream, filename)
 100         except (IOError, OSError), err:
 101                 # In case of error, try to remove win32 forbidden chars
 102                 filename = re.sub(ur'[<>:"\|\?\*]', u'#', filename)
 103
 104                 # An exception here should be caught in the caller
 105                 stream = open(filename, open_mode)
 106                 return (stream, filename)
 107
 108
 109 class DownloadError(Exception):
 110         """Download Error exception.
 111
 112         This exception may be thrown by FileDownloader objects if they are not
 113         configured to continue on errors. They will contain the appropriate
 114         error message.
 115         """
 116         pass
 117
 118 class SameFileError(Exception):
 119         """Same File exception.
 120
 121         This exception will be thrown by FileDownloader objects if they detect
 122         multiple files would have to be downloaded to the same file on disk.
 123         """
 124         pass
 125
 126 class PostProcessingError(Exception):
 127         """Post Processing exception.
 128
 129         This exception may be raised by PostProcessor's .run() method to
 130         indicate an error in the postprocessing task.
 131         """
 132         pass
 133
 134 class UnavailableFormatError(Exception):
 135         """Unavailable Format exception.
 136
 137         This exception will be thrown when a video is requested
 138         in a format that is not available for that video.
 139         """
 140         pass
 141
 142 class ContentTooShortError(Exception):
 143         """Content Too Short exception.
 144
 145         This exception may be raised by FileDownloader objects when a file they
 146         download is too small for what the server announced first, indicating
 147         the connection was probably interrupted.
 148         """
 149         # Both in bytes
 150         downloaded = None
 151         expected = None
 152
 153         def __init__(self, downloaded, expected):
 154                 self.downloaded = downloaded
 155                 self.expected = expected
 156
 157 class FileDownloader(object):
 158         """File Downloader class.
 159
 160         File downloader objects are the ones responsible of downloading the
 161         actual video file and writing it to disk if the user has requested
 162         it, among some other tasks. In most cases there should be one per
 163         program. As, given a video URL, the downloader doesn't know how to
 164         extract all the needed information, task that InfoExtractors do, it
 165         has to pass the URL to one of them.
 166
 167         For this, file downloader objects have a method that allows
 168         InfoExtractors to be registered in a given order. When it is passed
 169         a URL, the file downloader handles it to the first InfoExtractor it
 170         finds that reports being able to handle it. The InfoExtractor extracts
 171         all the information about the video or videos the URL refers to, and
 172         asks the FileDownloader to process the video information, possibly
 173         downloading the video.
 174
 175         File downloaders accept a lot of parameters. In order not to saturate
 176         the object constructor with arguments, it receives a dictionary of
 177         options instead. These options are available through the params
 178         attribute for the InfoExtractors to use. The FileDownloader also
 179         registers itself as the downloader in charge for the InfoExtractors
 180         that are added to it, so this is a "mutual registration".
 181
 182         Available options:
 183
 184         username:       Username for authentication purposes.
 185         password:       Password for authentication purposes.
 186         usenetrc:       Use netrc for authentication instead.
 187         quiet:          Do not print messages to stdout.
 188         forceurl:       Force printing final URL.
 189         forcetitle:     Force printing title.
 190         simulate:       Do not download the video files.
 191         format:         Video format code.
 192         outtmpl:        Template for output names.
 193         ignoreerrors:   Do not stop on download errors.
 194         ratelimit:      Download speed limit, in bytes/sec.
 195         nooverwrites:   Prevent overwriting files.
 196         continuedl:     Try to continue downloads if possible.
 197         noprogress:     Do not print the progress bar.
 198         """
 199
 200         params = None
 201         _ies = []
 202         _pps = []
 203         _download_retcode = None
 204
 205         def __init__(self, params):
 206                 """Create a FileDownloader object with the given options."""
 207                 self._ies = []
 208                 self._pps = []
 209                 self._download_retcode = 0
 210                 self.params = params
 211
 212         @staticmethod
 213         def pmkdir(filename):
 214                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 215                 components = filename.split(os.sep)
 216                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 217                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 218                 for dir in aggregate:
 219                         if not os.path.exists(dir):
 220                                 os.mkdir(dir)
 221
 222         @staticmethod
 223         def format_bytes(bytes):
 224                 if bytes is None:
 225                         return 'N/A'
 226                 if type(bytes) is str:
 227                         bytes = float(bytes)
 228                 if bytes == 0.0:
 229                         exponent = 0
 230                 else:
 231                         exponent = long(math.log(bytes, 1024.0))
 232                 suffix = 'bkMGTPEZY'[exponent]
 233                 converted = float(bytes) / float(1024**exponent)
 234                 return '%.2f%s' % (converted, suffix)
 235
 236         @staticmethod
 237         def calc_percent(byte_counter, data_len):
 238                 if data_len is None:
 239                         return '---.-%'
 240                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 241
 242         @staticmethod
 243         def calc_eta(start, now, total, current):
 244                 if total is None:
 245                         return '--:--'
 246                 dif = now - start
 247                 if current == 0 or dif < 0.001: # One millisecond
 248                         return '--:--'
 249                 rate = float(current) / dif
 250                 eta = long((float(total) - float(current)) / rate)
 251                 (eta_mins, eta_secs) = divmod(eta, 60)
 252                 if eta_mins > 99:
 253                         return '--:--'
 254                 return '%02d:%02d' % (eta_mins, eta_secs)
 255
 256         @staticmethod
 257         def calc_speed(start, now, bytes):
 258                 dif = now - start
 259                 if bytes == 0 or dif < 0.001: # One millisecond
 260                         return '%10s' % '---b/s'
 261                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 262
 263         @staticmethod
 264         def best_block_size(elapsed_time, bytes):
 265                 new_min = max(bytes / 2.0, 1.0)
 266                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 267                 if elapsed_time < 0.001:
 268                         return long(new_max)
 269                 rate = bytes / elapsed_time
 270                 if rate > new_max:
 271                         return long(new_max)
 272                 if rate < new_min:
 273                         return long(new_min)
 274                 return long(rate)
 275
 276         @staticmethod
 277         def parse_bytes(bytestr):
 278                 """Parse a string indicating a byte quantity into a long integer."""
 279                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 280                 if matchobj is None:
 281                         return None
 282                 number = float(matchobj.group(1))
 283                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 284                 return long(round(number * multiplier))
 285
 286         @staticmethod
 287         def verify_url(url):
 288                 """Verify a URL is valid and data could be downloaded. Return real data URL."""
 289                 request = urllib2.Request(url, None, std_headers)
 290                 data = urllib2.urlopen(request)
 291                 data.read(1)
 292                 url = data.geturl()
 293                 data.close()
 294                 return url
 295
 296         def add_info_extractor(self, ie):
 297                 """Add an InfoExtractor object to the end of the list."""
 298                 self._ies.append(ie)
 299                 ie.set_downloader(self)
 300
 301         def add_post_processor(self, pp):
 302                 """Add a PostProcessor object to the end of the chain."""
 303                 self._pps.append(pp)
 304                 pp.set_downloader(self)
 305
 306         def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False):
 307                 """Print message to stdout if not in quiet mode."""
 308                 try:
 309                         if not self.params.get('quiet', False):
 310                                 print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
 311                         sys.stdout.flush()
 312                 except (UnicodeEncodeError), err:
 313                         if not ignore_encoding_errors:
 314                                 raise
 315
 316         def to_stderr(self, message):
 317                 """Print message to stderr."""
 318                 print >>sys.stderr, message.encode(preferredencoding())
 319
 320         def fixed_template(self):
 321                 """Checks if the output template is fixed."""
 322                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 323
 324         def trouble(self, message=None):
 325                 """Determine action to take when a download problem appears.
 326
 327                 Depending on if the downloader has been configured to ignore
 328                 download errors or not, this method may throw an exception or
 329                 not when errors are found, after printing the message.
 330                 """
 331                 if message is not None:
 332                         self.to_stderr(message)
 333                 if not self.params.get('ignoreerrors', False):
 334                         raise DownloadError(message)
 335                 self._download_retcode = 1
 336
 337         def slow_down(self, start_time, byte_counter):
 338                 """Sleep if the download speed is over the rate limit."""
 339                 rate_limit = self.params.get('ratelimit', None)
 340                 if rate_limit is None or byte_counter == 0:
 341                         return
 342                 now = time.time()
 343                 elapsed = now - start_time
 344                 if elapsed <= 0.0:
 345                         return
 346                 speed = float(byte_counter) / elapsed
 347                 if speed > rate_limit:
 348                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 349
 350         def report_destination(self, filename):
 351                 """Report destination filename."""
 352                 self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
 353
 354         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 355                 """Report download progress."""
 356                 if self.params.get('noprogress', False):
 357                         return
 358                 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
 359                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 360
 361         def report_resuming_byte(self, resume_len):
 362                 """Report attemtp to resume at given byte."""
 363                 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
 364
 365         def report_file_already_downloaded(self, file_name):
 366                 """Report file has already been fully downloaded."""
 367                 try:
 368                         self.to_stdout(u'[download] %s has already been downloaded' % file_name)
 369                 except (UnicodeEncodeError), err:
 370                         self.to_stdout(u'[download] The file has already been downloaded')
 371
 372         def report_unable_to_resume(self):
 373                 """Report it was impossible to resume download."""
 374                 self.to_stdout(u'[download] Unable to resume')
 375
 376         def report_finish(self):
 377                 """Report download finished."""
 378                 if self.params.get('noprogress', False):
 379                         self.to_stdout(u'[download] Download completed')
 380                 else:
 381                         self.to_stdout(u'')
 382
 383         def process_info(self, info_dict):
 384                 """Process a single dictionary returned by an InfoExtractor."""
 385                 # Do nothing else if in simulate mode
 386                 if self.params.get('simulate', False):
 387                         # Verify URL if it's an HTTP one
 388                         if info_dict['url'].startswith('http'):
 389                                 try:
 390                                         self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
 391                                 except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
 392                                         raise UnavailableFormatError
 393
 394                         # Forced printings
 395                         if self.params.get('forcetitle', False):
 396                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 397                         if self.params.get('forceurl', False):
 398                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 399
 400                         return
 401
 402                 try:
 403                         template_dict = dict(info_dict)
 404                         template_dict['epoch'] = unicode(long(time.time()))
 405                         filename = self.params['outtmpl'] % template_dict
 406                 except (ValueError, KeyError), err:
 407                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
 408                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 409                         self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
 410                         return
 411
 412                 try:
 413                         self.pmkdir(filename)
 414                 except (OSError, IOError), err:
 415                         self.trouble('ERROR: unable to create directories: %s' % str(err))
 416                         return
 417
 418                 try:
 419                         success = self._do_download(filename, info_dict['url'].encode('utf-8'))
 420                 except (OSError, IOError), err:
 421                         raise UnavailableFormatError
 422                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 423                         self.trouble('ERROR: unable to download video data: %s' % str(err))
 424                         return
 425                 except (ContentTooShortError, ), err:
 426                         self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 427                         return
 428
 429                 if success:
 430                         try:
 431                                 self.post_process(filename, info_dict)
 432                         except (PostProcessingError), err:
 433                                 self.trouble('ERROR: postprocessing: %s' % str(err))
 434                                 return
 435
 436         def download(self, url_list):
 437                 """Download a given list of URLs."""
 438                 if len(url_list) > 1 and self.fixed_template():
 439                         raise SameFileError(self.params['outtmpl'])
 440
 441                 for url in url_list:
 442                         suitable_found = False
 443                         for ie in self._ies:
 444                                 # Go to next InfoExtractor if not suitable
 445                                 if not ie.suitable(url):
 446                                         continue
 447
 448                                 # Suitable InfoExtractor found
 449                                 suitable_found = True
 450
 451                                 # Extract information from URL and process it
 452                                 ie.extract(url)
 453
 454                                 # Suitable InfoExtractor had been found; go to next URL
 455                                 break
 456
 457                         if not suitable_found:
 458                                 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
 459
 460                 return self._download_retcode
 461
 462         def post_process(self, filename, ie_info):
 463                 """Run the postprocessing chain on the given file."""
 464                 info = dict(ie_info)
 465                 info['filepath'] = filename
 466                 for pp in self._pps:
 467                         info = pp.run(info)
 468                         if info is None:
 469                                 break
 470
 471         def _download_with_rtmpdump(self, filename, url):
 472                 self.report_destination(filename)
 473
 474                 # Check for rtmpdump first
 475                 try:
 476                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 477                 except (OSError, IOError):
 478                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 479                         return False
 480
 481                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 482                 # the connection was interrumpted and resuming appears to be
 483                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 484                 basic_args = ['rtmpdump', '-q', '-r', url, '-o', filename]
 485                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 486                 while retval == 2 or retval == 1:
 487                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True)
 488                         time.sleep(2.0) # This seems to be needed
 489                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 490                 if retval == 0:
 491                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
 492                         return True
 493                 else:
 494                         self.trouble('\nERROR: rtmpdump exited with code %d' % retval)
 495                         return False
 496
 497         def _do_download(self, filename, url):
 498                 # Attempt to download using rtmpdump
 499                 if url.startswith('rtmp'):
 500                         return self._download_with_rtmpdump(filename, url)
 501
 502                 stream = None
 503                 open_mode = 'wb'
 504                 basic_request = urllib2.Request(url, None, std_headers)
 505                 request = urllib2.Request(url, None, std_headers)
 506
 507                 # Establish possible resume length
 508                 if os.path.isfile(filename):
 509                         resume_len = os.path.getsize(filename)
 510                 else:
 511                         resume_len = 0
 512
 513                 # Request parameters in case of being able to resume
 514                 if self.params.get('continuedl', False) and resume_len != 0:
 515                         self.report_resuming_byte(resume_len)
 516                         request.add_header('Range','bytes=%d-' % resume_len)
 517                         open_mode = 'ab'
 518
 519                 # Establish connection
 520                 try:
 521                         data = urllib2.urlopen(request)
 522                 except (urllib2.HTTPError, ), err:
 523                         if err.code != 416: #  416 is 'Requested range not satisfiable'
 524                                 raise
 525                         # Unable to resume
 526                         data = urllib2.urlopen(basic_request)
 527                         content_length = data.info()['Content-Length']
 528
 529                         if content_length is not None and long(content_length) == resume_len:
 530                                 # Because the file had already been fully downloaded
 531                                 self.report_file_already_downloaded(filename)
 532                                 return True
 533                         else:
 534                                 # Because the server didn't let us
 535                                 self.report_unable_to_resume()
 536                                 open_mode = 'wb'
 537
 538                 data_len = data.info().get('Content-length', None)
 539                 data_len_str = self.format_bytes(data_len)
 540                 byte_counter = 0
 541                 block_size = 1024
 542                 start = time.time()
 543                 while True:
 544                         # Download and write
 545                         before = time.time()
 546                         data_block = data.read(block_size)
 547                         after = time.time()
 548                         data_block_len = len(data_block)
 549                         if data_block_len == 0:
 550                                 break
 551                         byte_counter += data_block_len
 552
 553                         # Open file just in time
 554                         if stream is None:
 555                                 try:
 556                                         (stream, filename) = sanitize_open(filename, open_mode)
 557                                         self.report_destination(filename)
 558                                 except (OSError, IOError), err:
 559                                         self.trouble('ERROR: unable to open for writing: %s' % str(err))
 560                                         return False
 561                         stream.write(data_block)
 562                         block_size = self.best_block_size(after - before, data_block_len)
 563
 564                         # Progress message
 565                         percent_str = self.calc_percent(byte_counter, data_len)
 566                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
 567                         speed_str = self.calc_speed(start, time.time(), byte_counter)
 568                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 569
 570                         # Apply rate limit
 571                         self.slow_down(start, byte_counter)
 572
 573                 self.report_finish()
 574                 if data_len is not None and str(byte_counter) != data_len:
 575                         raise ContentTooShortError(byte_counter, long(data_len))
 576                 return True
 577
 578 class InfoExtractor(object):
 579         """Information Extractor class.
 580
 581         Information extractors are the classes that, given a URL, extract
 582         information from the video (or videos) the URL refers to. This
 583         information includes the real video URL, the video title and simplified
 584         title, author and others. The information is stored in a dictionary
 585         which is then passed to the FileDownloader. The FileDownloader
 586         processes this information possibly downloading the video to the file
 587         system, among other possible outcomes. The dictionaries must include
 588         the following fields:
 589
 590         id:             Video identifier.
 591         url:            Final video URL.
 592         uploader:       Nickname of the video uploader.
 593         title:          Literal title.
 594         stitle:         Simplified title.
 595         ext:            Video filename extension.
 596         format:         Video format.
 597
 598         Subclasses of this one should re-define the _real_initialize() and
 599         _real_extract() methods, as well as the suitable() static method.
 600         Probably, they should also be instantiated and added to the main
 601         downloader.
 602         """
 603
 604         _ready = False
 605         _downloader = None
 606
 607         def __init__(self, downloader=None):
 608                 """Constructor. Receives an optional downloader."""
 609                 self._ready = False
 610                 self.set_downloader(downloader)
 611
 612         @staticmethod
 613         def suitable(url):
 614                 """Receives a URL and returns True if suitable for this IE."""
 615                 return False
 616
 617         def initialize(self):
 618                 """Initializes an instance (authentication, etc)."""
 619                 if not self._ready:
 620                         self._real_initialize()
 621                         self._ready = True
 622
 623         def extract(self, url):
 624                 """Extracts URL information and returns it in list of dicts."""
 625                 self.initialize()
 626                 return self._real_extract(url)
 627
 628         def set_downloader(self, downloader):
 629                 """Sets the downloader for this IE."""
 630                 self._downloader = downloader
 631
 632         def _real_initialize(self):
 633                 """Real initialization process. Redefine in subclasses."""
 634                 pass
 635
 636         def _real_extract(self, url):
 637                 """Real extraction process. Redefine in subclasses."""
 638                 pass
 639
 640 class YoutubeIE(InfoExtractor):
 641         """Information extractor for youtube.com."""
 642
 643         _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
 644         _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 645         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
 646         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 647         _NETRC_MACHINE = 'youtube'
 648         _available_formats = ['37', '22', '35', '18', '34', '5', '17', '13', None] # listed in order of priority for -b flag
 649         _video_extensions = {
 650                 '13': '3gp',
 651                 '17': 'mp4',
 652                 '18': 'mp4',
 653                 '22': 'mp4',
 654                 '37': 'mp4',
 655         }
 656
 657         @staticmethod
 658         def suitable(url):
 659                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 660
 661         def report_lang(self):
 662                 """Report attempt to set language."""
 663                 self._downloader.to_stdout(u'[youtube] Setting language')
 664
 665         def report_login(self):
 666                 """Report attempt to log in."""
 667                 self._downloader.to_stdout(u'[youtube] Logging in')
 668
 669         def report_age_confirmation(self):
 670                 """Report attempt to confirm age."""
 671                 self._downloader.to_stdout(u'[youtube] Confirming age')
 672
 673         def report_video_info_webpage_download(self, video_id):
 674                 """Report attempt to download video info webpage."""
 675                 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
 676
 677         def report_information_extraction(self, video_id):
 678                 """Report attempt to extract video information."""
 679                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
 680
 681         def report_unavailable_format(self, video_id, format):
 682                 """Report extracted video URL."""
 683                 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
 684
 685         def report_rtmp_download(self):
 686                 """Indicate the download will use the RTMP protocol."""
 687                 self._downloader.to_stdout(u'[youtube] RTMP download detected')
 688
 689         def _real_initialize(self):
 690                 if self._downloader is None:
 691                         return
 692
 693                 username = None
 694                 password = None
 695                 downloader_params = self._downloader.params
 696
 697                 # Attempt to use provided username and password or .netrc data
 698                 if downloader_params.get('username', None) is not None:
 699                         username = downloader_params['username']
 700                         password = downloader_params['password']
 701                 elif downloader_params.get('usenetrc', False):
 702                         try:
 703                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 704                                 if info is not None:
 705                                         username = info[0]
 706                                         password = info[2]
 707                                 else:
 708                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 709                         except (IOError, netrc.NetrcParseError), err:
 710                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 711                                 return
 712
 713                 # Set language
 714                 request = urllib2.Request(self._LANG_URL, None, std_headers)
 715                 try:
 716                         self.report_lang()
 717                         urllib2.urlopen(request).read()
 718                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 719                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 720                         return
 721
 722                 # No authentication to be performed
 723                 if username is None:
 724                         return
 725
 726                 # Log in
 727                 login_form = {
 728                                 'current_form': 'loginForm',
 729                                 'next':         '/',
 730                                 'action_login': 'Log In',
 731                                 'username':     username,
 732                                 'password':     password,
 733                                 }
 734                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
 735                 try:
 736                         self.report_login()
 737                         login_results = urllib2.urlopen(request).read()
 738                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 739                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 740                                 return
 741                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 742                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 743                         return
 744
 745                 # Confirm age
 746                 age_form = {
 747                                 'next_url':             '/',
 748                                 'action_confirm':       'Confirm',
 749                                 }
 750                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
 751                 try:
 752                         self.report_age_confirmation()
 753                         age_results = urllib2.urlopen(request).read()
 754                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 755                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 756                         return
 757
 758         def _real_extract(self, url):
 759                 # Extract video id from URL
 760                 mobj = re.match(self._VALID_URL, url)
 761                 if mobj is None:
 762                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 763                         return
 764                 video_id = mobj.group(2)
 765
 766                 # Downloader parameters
 767                 best_quality = False
 768                 all_formats = False
 769                 format_param = None
 770                 quality_index = 0
 771                 if self._downloader is not None:
 772                         params = self._downloader.params
 773                         format_param = params.get('format', None)
 774                         if format_param == '0':
 775                                 format_param = self._available_formats[quality_index]
 776                                 best_quality = True
 777                         elif format_param == '-1':
 778                                 format_param = self._available_formats[quality_index]
 779                                 all_formats = True
 780
 781                 while True:
 782                         # Extension
 783                         video_extension = self._video_extensions.get(format_param, 'flv')
 784
 785                         # Get video info
 786                         video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=embedded&ps=default&eurl=&gl=US&hl=en' % video_id
 787                         request = urllib2.Request(video_info_url, None, std_headers)
 788                         try:
 789                                 self.report_video_info_webpage_download(video_id)
 790                                 video_info_webpage = urllib2.urlopen(request).read()
 791                                 video_info = parse_qs(video_info_webpage)
 792                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 793                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
 794                                 return
 795                         self.report_information_extraction(video_id)
 796
 797                         # "t" param
 798                         if 'token' not in video_info:
 799                                 # Attempt to see if YouTube has issued an error message
 800                                 if 'reason' not in video_info:
 801                                         self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
 802                                         stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
 803                                         stream.write(video_info_webpage)
 804                                         stream.close()
 805                                 else:
 806                                         reason = urllib.unquote_plus(video_info['reason'][0])
 807                                         self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
 808                                 return
 809                         token = urllib.unquote_plus(video_info['token'][0])
 810                         video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
 811                         if format_param is not None:
 812                                 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
 813
 814                         # Check possible RTMP download
 815                         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
 816                                 self.report_rtmp_download()
 817                                 video_real_url = video_info['conn'][0]
 818
 819                         # uploader
 820                         if 'author' not in video_info:
 821                                 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 822                                 return
 823                         video_uploader = urllib.unquote_plus(video_info['author'][0])
 824
 825                         # title
 826                         if 'title' not in video_info:
 827                                 self._downloader.trouble(u'ERROR: unable to extract video title')
 828                                 return
 829                         video_title = urllib.unquote_plus(video_info['title'][0])
 830                         video_title = video_title.decode('utf-8')
 831                         video_title = sanitize_title(video_title)
 832
 833                         # simplified title
 834                         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 835                         simple_title = simple_title.strip(ur'_')
 836
 837                         try:
 838                                 # Process video information
 839                                 self._downloader.process_info({
 840                                         'id':           video_id.decode('utf-8'),
 841                                         'url':          video_real_url.decode('utf-8'),
 842                                         'uploader':     video_uploader.decode('utf-8'),
 843                                         'title':        video_title,
 844                                         'stitle':       simple_title,
 845                                         'ext':          video_extension.decode('utf-8'),
 846                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
 847                                 })
 848
 849                                 if all_formats:
 850                                         if quality_index == len(self._available_formats) - 1:
 851                                                 # None left to get
 852                                                 return
 853                                         else:
 854                                                 quality_index += 1
 855                                                 format_param = self._available_formats[quality_index]
 856                                                 if format_param == None:
 857                                                         return
 858                                                 continue
 859
 860                                 return
 861
 862                         except UnavailableFormatError, err:
 863                                 if best_quality or all_formats:
 864                                         if quality_index == len(self._available_formats) - 1:
 865                                                 # I don't ever expect this to happen
 866                                                 if not all_formats:
 867                                                         self._downloader.trouble(u'ERROR: no known formats available for video')
 868                                                 return
 869                                         else:
 870                                                 self.report_unavailable_format(video_id, format_param)
 871                                                 quality_index += 1
 872                                                 format_param = self._available_formats[quality_index]
 873                                                 if format_param == None:
 874                                                         return
 875                                                 continue
 876                                 else:
 877                                         self._downloader.trouble('ERROR: format not available for video')
 878                                         return
 879
 880
 881 class MetacafeIE(InfoExtractor):
 882         """Information Extractor for metacafe.com."""
 883
 884         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
 885         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
 886         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
 887         _youtube_ie = None
 888
 889         def __init__(self, youtube_ie, downloader=None):
 890                 InfoExtractor.__init__(self, downloader)
 891                 self._youtube_ie = youtube_ie
 892
 893         @staticmethod
 894         def suitable(url):
 895                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
 896
 897         def report_disclaimer(self):
 898                 """Report disclaimer retrieval."""
 899                 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
 900
 901         def report_age_confirmation(self):
 902                 """Report attempt to confirm age."""
 903                 self._downloader.to_stdout(u'[metacafe] Confirming age')
 904
 905         def report_download_webpage(self, video_id):
 906                 """Report webpage download."""
 907                 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
 908
 909         def report_extraction(self, video_id):
 910                 """Report information extraction."""
 911                 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
 912
 913         def _real_initialize(self):
 914                 # Retrieve disclaimer
 915                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
 916                 try:
 917                         self.report_disclaimer()
 918                         disclaimer = urllib2.urlopen(request).read()
 919                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 920                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
 921                         return
 922
 923                 # Confirm age
 924                 disclaimer_form = {
 925                         'filters': '0',
 926                         'submit': "Continue - I'm over 18",
 927                         }
 928                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
 929                 try:
 930                         self.report_age_confirmation()
 931                         disclaimer = urllib2.urlopen(request).read()
 932                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 933                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 934                         return
 935
 936         def _real_extract(self, url):
 937                 # Extract id and simplified title from URL
 938                 mobj = re.match(self._VALID_URL, url)
 939                 if mobj is None:
 940                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 941                         return
 942
 943                 video_id = mobj.group(1)
 944
 945                 # Check if video comes from YouTube
 946                 mobj2 = re.match(r'^yt-(.*)$', video_id)
 947                 if mobj2 is not None:
 948                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
 949                         return
 950
 951                 simple_title = mobj.group(2).decode('utf-8')
 952                 video_extension = 'flv'
 953
 954                 # Retrieve video webpage to extract further information
 955                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
 956                 try:
 957                         self.report_download_webpage(video_id)
 958                         webpage = urllib2.urlopen(request).read()
 959                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 960                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
 961                         return
 962
 963                 # Extract URL, uploader and title from webpage
 964                 self.report_extraction(video_id)
 965                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
 966                 if mobj is None:
 967                         self._downloader.trouble(u'ERROR: unable to extract media URL')
 968                         return
 969                 mediaURL = urllib.unquote(mobj.group(1))
 970
 971                 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
 972                 #if mobj is None:
 973                 #       self._downloader.trouble(u'ERROR: unable to extract gdaKey')
 974                 #       return
 975                 #gdaKey = mobj.group(1)
 976                 #
 977                 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
 978
 979                 video_url = mediaURL
 980
 981                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
 982                 if mobj is None:
 983                         self._downloader.trouble(u'ERROR: unable to extract title')
 984                         return
 985                 video_title = mobj.group(1).decode('utf-8')
 986                 video_title = sanitize_title(video_title)
 987
 988                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
 989                 if mobj is None:
 990                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 991                         return
 992                 video_uploader = mobj.group(1)
 993
 994                 try:
 995                         # Process video information
 996                         self._downloader.process_info({
 997                                 'id':           video_id.decode('utf-8'),
 998                                 'url':          video_url.decode('utf-8'),
 999                                 'uploader':     video_uploader.decode('utf-8'),
1000                                 'title':        video_title,
1001                                 'stitle':       simple_title,
1002                                 'ext':          video_extension.decode('utf-8'),
1003                                 'format':       u'NA',
1004                         })
1005                 except UnavailableFormatError:
1006                         self._downloader.trouble(u'ERROR: format not available for video')
1007
1008
1009 class GoogleIE(InfoExtractor):
1010         """Information extractor for video.google.com."""
1011
1012         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1013
1014         def __init__(self, downloader=None):
1015                 InfoExtractor.__init__(self, downloader)
1016
1017         @staticmethod
1018         def suitable(url):
1019                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1020
1021         def report_download_webpage(self, video_id):
1022                 """Report webpage download."""
1023                 self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
1024
1025         def report_extraction(self, video_id):
1026                 """Report information extraction."""
1027                 self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
1028
1029         def _real_initialize(self):
1030                 return
1031
1032         def _real_extract(self, url):
1033                 # Extract id from URL
1034                 mobj = re.match(self._VALID_URL, url)
1035                 if mobj is None:
1036                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1037                         return
1038
1039                 video_id = mobj.group(1)
1040
1041                 video_extension = 'mp4'
1042
1043                 # Retrieve video webpage to extract further information
1044                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1045                 try:
1046                         self.report_download_webpage(video_id)
1047                         webpage = urllib2.urlopen(request).read()
1048                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1049                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1050                         return
1051
1052                 # Extract URL, uploader, and title from webpage
1053                 self.report_extraction(video_id)
1054                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1055                 if mobj is None:
1056                         video_extension = 'flv'
1057                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1058                 if mobj is None:
1059                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1060                         return
1061                 mediaURL = urllib.unquote(mobj.group(1))
1062                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1063                 mediaURL = mediaURL.replace('\\x26', '\x26')
1064
1065                 video_url = mediaURL
1066
1067                 mobj = re.search(r'<title>(.*)</title>', webpage)
1068                 if mobj is None:
1069                         self._downloader.trouble(u'ERROR: unable to extract title')
1070                         return
1071                 video_title = mobj.group(1).decode('utf-8')
1072                 video_title = sanitize_title(video_title)
1073                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1074
1075                 try:
1076                         # Process video information
1077                         self._downloader.process_info({
1078                                 'id':           video_id.decode('utf-8'),
1079                                 'url':          video_url.decode('utf-8'),
1080                                 'uploader':     u'NA',
1081                                 'title':        video_title,
1082                                 'stitle':       simple_title,
1083                                 'ext':          video_extension.decode('utf-8'),
1084                                 'format':       u'NA',
1085                         })
1086                 except UnavailableFormatError:
1087                         self._downloader.trouble(u'ERROR: format not available for video')
1088
1089
1090 class PhotobucketIE(InfoExtractor):
1091         """Information extractor for photobucket.com."""
1092
1093         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1094
1095         def __init__(self, downloader=None):
1096                 InfoExtractor.__init__(self, downloader)
1097
1098         @staticmethod
1099         def suitable(url):
1100                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1101
1102         def report_download_webpage(self, video_id):
1103                 """Report webpage download."""
1104                 self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
1105
1106         def report_extraction(self, video_id):
1107                 """Report information extraction."""
1108                 self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
1109
1110         def _real_initialize(self):
1111                 return
1112
1113         def _real_extract(self, url):
1114                 # Extract id from URL
1115                 mobj = re.match(self._VALID_URL, url)
1116                 if mobj is None:
1117                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1118                         return
1119
1120                 video_id = mobj.group(1)
1121
1122                 video_extension = 'flv'
1123
1124                 # Retrieve video webpage to extract further information
1125                 request = urllib2.Request(url)
1126                 try:
1127                         self.report_download_webpage(video_id)
1128                         webpage = urllib2.urlopen(request).read()
1129                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1130                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1131                         return
1132
1133                 # Extract URL, uploader, and title from webpage
1134                 self.report_extraction(video_id)
1135                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1136                 if mobj is None:
1137                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1138                         return
1139                 mediaURL = urllib.unquote(mobj.group(1))
1140
1141                 video_url = mediaURL
1142
1143                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1144                 if mobj is None:
1145                         self._downloader.trouble(u'ERROR: unable to extract title')
1146                         return
1147                 video_title = mobj.group(1).decode('utf-8')
1148                 video_title = sanitize_title(video_title)
1149                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1150
1151                 video_uploader = mobj.group(2).decode('utf-8')
1152
1153                 try:
1154                         # Process video information
1155                         self._downloader.process_info({
1156                                 'id':           video_id.decode('utf-8'),
1157                                 'url':          video_url.decode('utf-8'),
1158                                 'uploader':     video_uploader,
1159                                 'title':        video_title,
1160                                 'stitle':       simple_title,
1161                                 'ext':          video_extension.decode('utf-8'),
1162                                 'format':       u'NA',
1163                         })
1164                 except UnavailableFormatError:
1165                         self._downloader.trouble(u'ERROR: format not available for video')
1166
1167
1168 class YahooIE(InfoExtractor):
1169         """Information extractor for video.yahoo.com."""
1170
1171         # _VALID_URL matches all Yahoo! Video URLs
1172         # _VPAGE_URL matches only the extractable '/watch/' URLs
1173         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1174         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1175
1176         def __init__(self, downloader=None):
1177                 InfoExtractor.__init__(self, downloader)
1178
1179         @staticmethod
1180         def suitable(url):
1181                 return (re.match(YahooIE._VALID_URL, url) is not None)
1182
1183         def report_download_webpage(self, video_id):
1184                 """Report webpage download."""
1185                 self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id)
1186
1187         def report_extraction(self, video_id):
1188                 """Report information extraction."""
1189                 self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id)
1190
1191         def _real_initialize(self):
1192                 return
1193
1194         def _real_extract(self, url):
1195                 # Extract ID from URL
1196                 mobj = re.match(self._VALID_URL, url)
1197                 if mobj is None:
1198                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1199                         return
1200
1201                 video_id = mobj.group(2)
1202                 video_extension = 'flv'
1203
1204                 # Rewrite valid but non-extractable URLs as
1205                 # extractable English language /watch/ URLs
1206                 if re.match(self._VPAGE_URL, url) is None:
1207                         request = urllib2.Request(url)
1208                         try:
1209                                 webpage = urllib2.urlopen(request).read()
1210                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1211                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1212                                 return
1213
1214                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1215                         if mobj is None:
1216                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1217                                 return
1218                         yahoo_id = mobj.group(1)
1219
1220                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1221                         if mobj is None:
1222                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1223                                 return
1224                         yahoo_vid = mobj.group(1)
1225
1226                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1227                         return self._real_extract(url)
1228
1229                 # Retrieve video webpage to extract further information
1230                 request = urllib2.Request(url)
1231                 try:
1232                         self.report_download_webpage(video_id)
1233                         webpage = urllib2.urlopen(request).read()
1234                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1235                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1236                         return
1237
1238                 # Extract uploader and title from webpage
1239                 self.report_extraction(video_id)
1240                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1241                 if mobj is None:
1242                         self._downloader.trouble(u'ERROR: unable to extract video title')
1243                         return
1244                 video_title = mobj.group(1).decode('utf-8')
1245                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1246
1247                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1248                 if mobj is None:
1249                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1250                         return
1251                 video_uploader = mobj.group(1).decode('utf-8')
1252
1253                 # Extract video height and width
1254                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1255                 if mobj is None:
1256                         self._downloader.trouble(u'ERROR: unable to extract video height')
1257                         return
1258                 yv_video_height = mobj.group(1)
1259
1260                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1261                 if mobj is None:
1262                         self._downloader.trouble(u'ERROR: unable to extract video width')
1263                         return
1264                 yv_video_width = mobj.group(1)
1265
1266                 # Retrieve video playlist to extract media URL
1267                 # I'm not completely sure what all these options are, but we
1268                 # seem to need most of them, otherwise the server sends a 401.
1269                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1270                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1271                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1272                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1273                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1274                 try:
1275                         self.report_download_webpage(video_id)
1276                         webpage = urllib2.urlopen(request).read()
1277                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1278                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1279                         return
1280
1281                 # Extract media URL from playlist XML
1282                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1283                 if mobj is None:
1284                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1285                         return
1286                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1287                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1288
1289                 try:
1290                         # Process video information
1291                         self._downloader.process_info({
1292                                 'id':           video_id.decode('utf-8'),
1293                                 'url':          video_url,
1294                                 'uploader':     video_uploader,
1295                                 'title':        video_title,
1296                                 'stitle':       simple_title,
1297                                 'ext':          video_extension.decode('utf-8'),
1298                         })
1299                 except UnavailableFormatError:
1300                         self._downloader.trouble(u'ERROR: format not available for video')
1301
1302
1303 class GenericIE(InfoExtractor):
1304         """Generic last-resort information extractor."""
1305
1306         def __init__(self, downloader=None):
1307                 InfoExtractor.__init__(self, downloader)
1308
1309         @staticmethod
1310         def suitable(url):
1311                 return True
1312
1313         def report_download_webpage(self, video_id):
1314                 """Report webpage download."""
1315                 self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.')
1316                 self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id)
1317
1318         def report_extraction(self, video_id):
1319                 """Report information extraction."""
1320                 self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id)
1321
1322         def _real_initialize(self):
1323                 return
1324
1325         def _real_extract(self, url):
1326                 video_id = url.split('/')[-1]
1327                 request = urllib2.Request(url)
1328                 try:
1329                         self.report_download_webpage(video_id)
1330                         webpage = urllib2.urlopen(request).read()
1331                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1332                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1333                         return
1334                 except ValueError, err:
1335                         # since this is the last-resort InfoExtractor, if
1336                         # this error is thrown, it'll be thrown here
1337                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1338                         return
1339
1340                 # Start with something easy: JW Player in SWFObject
1341                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1342                 if mobj is None:
1343                         # Broaden the search a little bit
1344                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1345                 if mobj is None:
1346                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1347                         return
1348
1349                 # It's possible that one of the regexes
1350                 # matched, but returned an empty group:
1351                 if mobj.group(1) is None:
1352                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1353                         return
1354
1355                 video_url = urllib.unquote(mobj.group(1))
1356                 video_id  = os.path.basename(video_url)
1357
1358                 # here's a fun little line of code for you:
1359                 video_extension = os.path.splitext(video_id)[1][1:]
1360                 video_id        = os.path.splitext(video_id)[0]
1361
1362                 # it's tempting to parse this further, but you would
1363                 # have to take into account all the variations like
1364                 #   Video Title - Site Name
1365                 #   Site Name | Video Title
1366                 #   Video Title - Tagline | Site Name
1367                 # and so on and so forth; it's just not practical
1368                 mobj = re.search(r'<title>(.*)</title>', webpage)
1369                 if mobj is None:
1370                         self._downloader.trouble(u'ERROR: unable to extract title')
1371                         return
1372                 video_title = mobj.group(1).decode('utf-8')
1373                 video_title = sanitize_title(video_title)
1374                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1375
1376                 # video uploader is domain name
1377                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1378                 if mobj is None:
1379                         self._downloader.trouble(u'ERROR: unable to extract title')
1380                         return
1381                 video_uploader = mobj.group(1).decode('utf-8')
1382
1383                 try:
1384                         # Process video information
1385                         self._downloader.process_info({
1386                                 'id':           video_id.decode('utf-8'),
1387                                 'url':          video_url.decode('utf-8'),
1388                                 'uploader':     video_uploader,
1389                                 'title':        video_title,
1390                                 'stitle':       simple_title,
1391                                 'ext':          video_extension.decode('utf-8'),
1392                                 'format':       u'NA',
1393                         })
1394                 except UnavailableFormatError:
1395                         self._downloader.trouble(u'ERROR: format not available for video')
1396
1397
1398 class YoutubeSearchIE(InfoExtractor):
1399         """Information Extractor for YouTube search queries."""
1400         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1401         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1402         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1403         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1404         _youtube_ie = None
1405         _max_youtube_results = 1000
1406
1407         def __init__(self, youtube_ie, downloader=None):
1408                 InfoExtractor.__init__(self, downloader)
1409                 self._youtube_ie = youtube_ie
1410
1411         @staticmethod
1412         def suitable(url):
1413                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1414
1415         def report_download_page(self, query, pagenum):
1416                 """Report attempt to download playlist page with given number."""
1417                 query = query.decode(preferredencoding())
1418                 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1419
1420         def _real_initialize(self):
1421                 self._youtube_ie.initialize()
1422
1423         def _real_extract(self, query):
1424                 mobj = re.match(self._VALID_QUERY, query)
1425                 if mobj is None:
1426                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1427                         return
1428
1429                 prefix, query = query.split(':')
1430                 prefix = prefix[8:]
1431                 query  = query.encode('utf-8')
1432                 if prefix == '':
1433                         self._download_n_results(query, 1)
1434                         return
1435                 elif prefix == 'all':
1436                         self._download_n_results(query, self._max_youtube_results)
1437                         return
1438                 else:
1439                         try:
1440                                 n = long(prefix)
1441                                 if n <= 0:
1442                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1443                                         return
1444                                 elif n > self._max_youtube_results:
1445                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1446                                         n = self._max_youtube_results
1447                                 self._download_n_results(query, n)
1448                                 return
1449                         except ValueError: # parsing prefix as integer fails
1450                                 self._download_n_results(query, 1)
1451                                 return
1452
1453         def _download_n_results(self, query, n):
1454                 """Downloads a specified number of results for a query"""
1455
1456                 video_ids = []
1457                 already_seen = set()
1458                 pagenum = 1
1459
1460                 while True:
1461                         self.report_download_page(query, pagenum)
1462                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1463                         request = urllib2.Request(result_url, None, std_headers)
1464                         try:
1465                                 page = urllib2.urlopen(request).read()
1466                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1467                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1468                                 return
1469
1470                         # Extract video identifiers
1471                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1472                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1473                                 if video_id not in already_seen:
1474                                         video_ids.append(video_id)
1475                                         already_seen.add(video_id)
1476                                         if len(video_ids) == n:
1477                                                 # Specified n videos reached
1478                                                 for id in video_ids:
1479                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1480                                                 return
1481
1482                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1483                                 for id in video_ids:
1484                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1485                                 return
1486
1487                         pagenum = pagenum + 1
1488
1489 class YoutubePlaylistIE(InfoExtractor):
1490         """Information Extractor for YouTube playlists."""
1491
1492         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
1493         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1494         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1495         _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
1496         _youtube_ie = None
1497
1498         def __init__(self, youtube_ie, downloader=None):
1499                 InfoExtractor.__init__(self, downloader)
1500                 self._youtube_ie = youtube_ie
1501
1502         @staticmethod
1503         def suitable(url):
1504                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1505
1506         def report_download_page(self, playlist_id, pagenum):
1507                 """Report attempt to download playlist page with given number."""
1508                 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1509
1510         def _real_initialize(self):
1511                 self._youtube_ie.initialize()
1512
1513         def _real_extract(self, url):
1514                 # Extract playlist id
1515                 mobj = re.match(self._VALID_URL, url)
1516                 if mobj is None:
1517                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1518                         return
1519
1520                 # Download playlist pages
1521                 playlist_id = mobj.group(1)
1522                 video_ids = []
1523                 pagenum = 1
1524
1525                 while True:
1526                         self.report_download_page(playlist_id, pagenum)
1527                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1528                         try:
1529                                 page = urllib2.urlopen(request).read()
1530                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1531                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1532                                 return
1533
1534                         # Extract video identifiers
1535                         ids_in_page = []
1536                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1537                                 if mobj.group(1) not in ids_in_page:
1538                                         ids_in_page.append(mobj.group(1))
1539                         video_ids.extend(ids_in_page)
1540
1541                         if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
1542                                 break
1543                         pagenum = pagenum + 1
1544
1545                 for id in video_ids:
1546                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1547                 return
1548
1549 class YoutubeUserIE(InfoExtractor):
1550         """Information Extractor for YouTube users."""
1551
1552         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1553         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1554         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1555         _youtube_ie = None
1556
1557         def __init__(self, youtube_ie, downloader=None):
1558                 InfoExtractor.__init__(self, downloader)
1559                 self._youtube_ie = youtube_ie
1560
1561         @staticmethod
1562         def suitable(url):
1563                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1564
1565         def report_download_page(self, username):
1566                 """Report attempt to download user page."""
1567                 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1568
1569         def _real_initialize(self):
1570                 self._youtube_ie.initialize()
1571
1572         def _real_extract(self, url):
1573                 # Extract username
1574                 mobj = re.match(self._VALID_URL, url)
1575                 if mobj is None:
1576                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1577                         return
1578
1579                 # Download user page
1580                 username = mobj.group(1)
1581                 video_ids = []
1582                 pagenum = 1
1583
1584                 self.report_download_page(username)
1585                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1586                 try:
1587                         page = urllib2.urlopen(request).read()
1588                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1589                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1590                         return
1591
1592                 # Extract video identifiers
1593                 ids_in_page = []
1594
1595                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1596                         if mobj.group(1) not in ids_in_page:
1597                                 ids_in_page.append(mobj.group(1))
1598                 video_ids.extend(ids_in_page)
1599
1600                 for id in video_ids:
1601                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1602                 return
1603
1604 class PostProcessor(object):
1605         """Post Processor class.
1606
1607         PostProcessor objects can be added to downloaders with their
1608         add_post_processor() method. When the downloader has finished a
1609         successful download, it will take its internal chain of PostProcessors
1610         and start calling the run() method on each one of them, first with
1611         an initial argument and then with the returned value of the previous
1612         PostProcessor.
1613
1614         The chain will be stopped if one of them ever returns None or the end
1615         of the chain is reached.
1616
1617         PostProcessor objects follow a "mutual registration" process similar
1618         to InfoExtractor objects.
1619         """
1620
1621         _downloader = None
1622
1623         def __init__(self, downloader=None):
1624                 self._downloader = downloader
1625
1626         def set_downloader(self, downloader):
1627                 """Sets the downloader for this PP."""
1628                 self._downloader = downloader
1629
1630         def run(self, information):
1631                 """Run the PostProcessor.
1632
1633                 The "information" argument is a dictionary like the ones
1634                 composed by InfoExtractors. The only difference is that this
1635                 one has an extra field called "filepath" that points to the
1636                 downloaded file.
1637
1638                 When this method returns None, the postprocessing chain is
1639                 stopped. However, this method may return an information
1640                 dictionary that will be passed to the next postprocessing
1641                 object in the chain. It can be the one it received after
1642                 changing some fields.
1643
1644                 In addition, this method may raise a PostProcessingError
1645                 exception that will be taken into account by the downloader
1646                 it was called from.
1647                 """
1648                 return information # by default, do nothing
1649
1650 ### MAIN PROGRAM ###
1651 if __name__ == '__main__':
1652         try:
1653                 # Modules needed only when running the main program
1654                 import getpass
1655                 import optparse
1656
1657                 # Function to update the program file with the latest version from bitbucket.org
1658                 def update_self(downloader, filename):
1659                         # Note: downloader only used for options
1660                         if not os.access (filename, os.W_OK):
1661                                 sys.exit('ERROR: no write permissions on %s' % filename)
1662
1663                         downloader.to_stdout('Updating to latest stable version...')
1664                         latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1665                         latest_version = urllib.urlopen(latest_url).read().strip()
1666                         prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1667                         newcontent = urllib.urlopen(prog_url).read()
1668                         stream = open(filename, 'w')
1669                         stream.write(newcontent)
1670                         stream.close()
1671                         downloader.to_stdout('Updated to version %s' % latest_version)
1672
1673                 # General configuration
1674                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
1675                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
1676                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1677
1678                 # Parse command line
1679                 parser = optparse.OptionParser(
1680                         usage='Usage: %prog [options] url...',
1681                         version='2010.03.13',
1682                         conflict_handler='resolve',
1683                 )
1684
1685                 parser.add_option('-h', '--help',
1686                                 action='help', help='print this help text and exit')
1687                 parser.add_option('-v', '--version',
1688                                 action='version', help='print program version and exit')
1689                 parser.add_option('-U', '--update',
1690                                 action='store_true', dest='update_self', help='update this program to latest stable version')
1691                 parser.add_option('-i', '--ignore-errors',
1692                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
1693                 parser.add_option('-r', '--rate-limit',
1694                                 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
1695
1696                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
1697                 authentication.add_option('-u', '--username',
1698                                 dest='username', metavar='UN', help='account username')
1699                 authentication.add_option('-p', '--password',
1700                                 dest='password', metavar='PW', help='account password')
1701                 authentication.add_option('-n', '--netrc',
1702                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
1703                 parser.add_option_group(authentication)
1704
1705                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
1706                 video_format.add_option('-f', '--format',
1707                                 action='store', dest='format', metavar='FMT', help='video format code')
1708                 video_format.add_option('-b', '--best-quality',
1709                                 action='store_const', dest='format', help='download the best quality video possible', const='0')
1710                 video_format.add_option('-m', '--mobile-version',
1711                                 action='store_const', dest='format', help='alias for -f 17', const='17')
1712                 video_format.add_option('-d', '--high-def',
1713                                 action='store_const', dest='format', help='alias for -f 22', const='22')
1714                 video_format.add_option('--all-formats',
1715                                 action='store_const', dest='format', help='download all available video formats', const='-1')
1716                 parser.add_option_group(video_format)
1717
1718                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
1719                 verbosity.add_option('-q', '--quiet',
1720                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
1721                 verbosity.add_option('-s', '--simulate',
1722                                 action='store_true', dest='simulate', help='do not download video', default=False)
1723                 verbosity.add_option('-g', '--get-url',
1724                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
1725                 verbosity.add_option('-e', '--get-title',
1726                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
1727                 verbosity.add_option('--no-progress',
1728                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
1729                 parser.add_option_group(verbosity)
1730
1731                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
1732                 filesystem.add_option('-t', '--title',
1733                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
1734                 filesystem.add_option('-l', '--literal',
1735                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
1736                 filesystem.add_option('-o', '--output',
1737                                 dest='outtmpl', metavar='TPL', help='output filename template')
1738                 filesystem.add_option('-a', '--batch-file',
1739                                 dest='batchfile', metavar='F', help='file containing URLs to download')
1740                 filesystem.add_option('-w', '--no-overwrites',
1741                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
1742                 filesystem.add_option('-c', '--continue',
1743                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
1744                 parser.add_option_group(filesystem)
1745
1746                 (opts, args) = parser.parse_args()
1747
1748                 # Batch file verification
1749                 batchurls = []
1750                 if opts.batchfile is not None:
1751                         try:
1752                                 batchurls = open(opts.batchfile, 'r').readlines()
1753                                 batchurls = [x.strip() for x in batchurls]
1754                                 batchurls = [x for x in batchurls if len(x) > 0]
1755                         except IOError:
1756                                 sys.exit(u'ERROR: batch file could not be read')
1757                 all_urls = batchurls + args
1758
1759                 # Conflicting, missing and erroneous options
1760                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
1761                         parser.error(u'using .netrc conflicts with giving username/password')
1762                 if opts.password is not None and opts.username is None:
1763                         parser.error(u'account username missing')
1764                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
1765                         parser.error(u'using output template conflicts with using title or literal title')
1766                 if opts.usetitle and opts.useliteral:
1767                         parser.error(u'using title conflicts with using literal title')
1768                 if opts.username is not None and opts.password is None:
1769                         opts.password = getpass.getpass(u'Type account password and press return:')
1770                 if opts.ratelimit is not None:
1771                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
1772                         if numeric_limit is None:
1773                                 parser.error(u'invalid rate limit specified')
1774                         opts.ratelimit = numeric_limit
1775
1776                 # Information extractors
1777                 youtube_ie = YoutubeIE()
1778                 metacafe_ie = MetacafeIE(youtube_ie)
1779                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
1780                 youtube_user_ie = YoutubeUserIE(youtube_ie)
1781                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
1782                 google_ie = GoogleIE()
1783                 photobucket_ie = PhotobucketIE()
1784                 yahoo_ie = YahooIE()
1785                 generic_ie = GenericIE()
1786
1787                 # File downloader
1788                 fd = FileDownloader({
1789                         'usenetrc': opts.usenetrc,
1790                         'username': opts.username,
1791                         'password': opts.password,
1792                         'quiet': (opts.quiet or opts.geturl or opts.gettitle),
1793                         'forceurl': opts.geturl,
1794                         'forcetitle': opts.gettitle,
1795                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
1796                         'format': opts.format,
1797                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
1798                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
1799                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
1800                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
1801                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
1802                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
1803                                 or u'%(id)s.%(ext)s'),
1804                         'ignoreerrors': opts.ignoreerrors,
1805                         'ratelimit': opts.ratelimit,
1806                         'nooverwrites': opts.nooverwrites,
1807                         'continuedl': opts.continue_dl,
1808                         'noprogress': opts.noprogress,
1809                         })
1810                 fd.add_info_extractor(youtube_search_ie)
1811                 fd.add_info_extractor(youtube_pl_ie)
1812                 fd.add_info_extractor(youtube_user_ie)
1813                 fd.add_info_extractor(metacafe_ie)
1814                 fd.add_info_extractor(youtube_ie)
1815                 fd.add_info_extractor(google_ie)
1816                 fd.add_info_extractor(photobucket_ie)
1817                 fd.add_info_extractor(yahoo_ie)
1818
1819                 # This must come last since it's the
1820                 # fallback if none of the others work
1821                 fd.add_info_extractor(generic_ie)
1822
1823                 # Update version
1824                 if opts.update_self:
1825                         update_self(fd, sys.argv[0])
1826
1827                 # Maybe do nothing
1828                 if len(all_urls) < 1:
1829                         if not opts.update_self:
1830                                 parser.error(u'you must provide at least one URL')
1831                         else:
1832                                 sys.exit()
1833                 retcode = fd.download(all_urls)
1834                 sys.exit(retcode)
1835
1836         except DownloadError:
1837                 sys.exit(1)
1838         except SameFileError:
1839                 sys.exit(u'ERROR: fixed output name but more than one file to download')
1840         except KeyboardInterrupt:
1841                 sys.exit(u'\nERROR: Interrupted by user')