youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # Author: Ricardo Garcia Gonzalez
   4 # Author: Danny Colligan
   5 # License: Public domain code
   6 import htmlentitydefs
   7 import httplib
   8 import locale
   9 import math
  10 import netrc
  11 import os
  12 import os.path
  13 import re
  14 import socket
  15 import string
  16 import sys
  17 import time
  18 import urllib
  19 import urllib2
  20
  21 std_headers = {
  22         'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
  23         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  24         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
  25         'Accept-Language': 'en-us,en;q=0.5',
  26 }
  27
  28 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  29
  30 def preferredencoding():
  31         """Get preferred encoding.
  32
  33         Returns the best encoding scheme for the system, based on
  34         locale.getpreferredencoding() and some further tweaks.
  35         """
  36         def yield_preferredencoding():
  37                 try:
  38                         pref = locale.getpreferredencoding()
  39                         u'TEST'.encode(pref)
  40                 except:
  41                         pref = 'UTF-8'
  42                 while True:
  43                         yield pref
  44         return yield_preferredencoding().next()
  45
  46 class DownloadError(Exception):
  47         """Download Error exception.
  48
  49         This exception may be thrown by FileDownloader objects if they are not
  50         configured to continue on errors. They will contain the appropriate
  51         error message.
  52         """
  53         pass
  54
  55 class SameFileError(Exception):
  56         """Same File exception.
  57
  58         This exception will be thrown by FileDownloader objects if they detect
  59         multiple files would have to be downloaded to the same file on disk.
  60         """
  61         pass
  62
  63 class PostProcessingError(Exception):
  64         """Post Processing exception.
  65
  66         This exception may be raised by PostProcessor's .run() method to
  67         indicate an error in the postprocessing task.
  68         """
  69         pass
  70
  71 class UnavailableFormatError(Exception):
  72         """Unavailable Format exception.
  73
  74         This exception will be thrown when a video is requested
  75         in a format that is not available for that video.
  76         """
  77         pass
  78
  79 class ContentTooShortError(Exception):
  80         """Content Too Short exception.
  81
  82         This exception may be raised by FileDownloader objects when a file they
  83         download is too small for what the server announced first, indicating
  84         the connection was probably interrupted.
  85         """
  86         # Both in bytes
  87         downloaded = None
  88         expected = None
  89
  90         def __init__(self, downloaded, expected):
  91                 self.downloaded = downloaded
  92                 self.expected = expected
  93
  94 class FileDownloader(object):
  95         """File Downloader class.
  96
  97         File downloader objects are the ones responsible of downloading the
  98         actual video file and writing it to disk if the user has requested
  99         it, among some other tasks. In most cases there should be one per
 100         program. As, given a video URL, the downloader doesn't know how to
 101         extract all the needed information, task that InfoExtractors do, it
 102         has to pass the URL to one of them.
 103
 104         For this, file downloader objects have a method that allows
 105         InfoExtractors to be registered in a given order. When it is passed
 106         a URL, the file downloader handles it to the first InfoExtractor it
 107         finds that reports being able to handle it. The InfoExtractor extracts
 108         all the information about the video or videos the URL refers to, and
 109         asks the FileDownloader to process the video information, possibly
 110         downloading the video.
 111
 112         File downloaders accept a lot of parameters. In order not to saturate
 113         the object constructor with arguments, it receives a dictionary of
 114         options instead. These options are available through the params
 115         attribute for the InfoExtractors to use. The FileDownloader also
 116         registers itself as the downloader in charge for the InfoExtractors
 117         that are added to it, so this is a "mutual registration".
 118
 119         Available options:
 120
 121         username:       Username for authentication purposes.
 122         password:       Password for authentication purposes.
 123         usenetrc:       Use netrc for authentication instead.
 124         quiet:          Do not print messages to stdout.
 125         forceurl:       Force printing final URL.
 126         forcetitle:     Force printing title.
 127         simulate:       Do not download the video files.
 128         format:         Video format code.
 129         outtmpl:        Template for output names.
 130         ignoreerrors:   Do not stop on download errors.
 131         ratelimit:      Download speed limit, in bytes/sec.
 132         nooverwrites:   Prevent overwriting files.
 133         continuedl:     Try to continue downloads if possible.
 134         """
 135
 136         params = None
 137         _ies = []
 138         _pps = []
 139         _download_retcode = None
 140
 141         def __init__(self, params):
 142                 """Create a FileDownloader object with the given options."""
 143                 self._ies = []
 144                 self._pps = []
 145                 self._download_retcode = 0
 146                 self.params = params
 147
 148         @staticmethod
 149         def pmkdir(filename):
 150                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 151                 components = filename.split(os.sep)
 152                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 153                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 154                 for dir in aggregate:
 155                         if not os.path.exists(dir):
 156                                 os.mkdir(dir)
 157
 158         @staticmethod
 159         def format_bytes(bytes):
 160                 if bytes is None:
 161                         return 'N/A'
 162                 if type(bytes) is str:
 163                         bytes = float(bytes)
 164                 if bytes == 0.0:
 165                         exponent = 0
 166                 else:
 167                         exponent = long(math.log(bytes, 1024.0))
 168                 suffix = 'bkMGTPEZY'[exponent]
 169                 converted = float(bytes) / float(1024**exponent)
 170                 return '%.2f%s' % (converted, suffix)
 171
 172         @staticmethod
 173         def calc_percent(byte_counter, data_len):
 174                 if data_len is None:
 175                         return '---.-%'
 176                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 177
 178         @staticmethod
 179         def calc_eta(start, now, total, current):
 180                 if total is None:
 181                         return '--:--'
 182                 dif = now - start
 183                 if current == 0 or dif < 0.001: # One millisecond
 184                         return '--:--'
 185                 rate = float(current) / dif
 186                 eta = long((float(total) - float(current)) / rate)
 187                 (eta_mins, eta_secs) = divmod(eta, 60)
 188                 if eta_mins > 99:
 189                         return '--:--'
 190                 return '%02d:%02d' % (eta_mins, eta_secs)
 191
 192         @staticmethod
 193         def calc_speed(start, now, bytes):
 194                 dif = now - start
 195                 if bytes == 0 or dif < 0.001: # One millisecond
 196                         return '%10s' % '---b/s'
 197                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 198
 199         @staticmethod
 200         def best_block_size(elapsed_time, bytes):
 201                 new_min = max(bytes / 2.0, 1.0)
 202                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 203                 if elapsed_time < 0.001:
 204                         return long(new_max)
 205                 rate = bytes / elapsed_time
 206                 if rate > new_max:
 207                         return long(new_max)
 208                 if rate < new_min:
 209                         return long(new_min)
 210                 return long(rate)
 211
 212         @staticmethod
 213         def parse_bytes(bytestr):
 214                 """Parse a string indicating a byte quantity into a long integer."""
 215                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 216                 if matchobj is None:
 217                         return None
 218                 number = float(matchobj.group(1))
 219                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 220                 return long(round(number * multiplier))
 221
 222         @staticmethod
 223         def verify_url(url):
 224                 """Verify a URL is valid and data could be downloaded. Return real data URL."""
 225                 request = urllib2.Request(url, None, std_headers)
 226                 data = urllib2.urlopen(request)
 227                 data.read(1)
 228                 url = data.geturl()
 229                 data.close()
 230                 return url
 231
 232         def add_info_extractor(self, ie):
 233                 """Add an InfoExtractor object to the end of the list."""
 234                 self._ies.append(ie)
 235                 ie.set_downloader(self)
 236
 237         def add_post_processor(self, pp):
 238                 """Add a PostProcessor object to the end of the chain."""
 239                 self._pps.append(pp)
 240                 pp.set_downloader(self)
 241
 242         def to_stdout(self, message, skip_eol=False):
 243                 """Print message to stdout if not in quiet mode."""
 244                 if not self.params.get('quiet', False):
 245                         print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
 246                         sys.stdout.flush()
 247
 248         def to_stderr(self, message):
 249                 """Print message to stderr."""
 250                 print >>sys.stderr, message.encode(preferredencoding())
 251
 252         def fixed_template(self):
 253                 """Checks if the output template is fixed."""
 254                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 255
 256         def trouble(self, message=None):
 257                 """Determine action to take when a download problem appears.
 258
 259                 Depending on if the downloader has been configured to ignore
 260                 download errors or not, this method may throw an exception or
 261                 not when errors are found, after printing the message.
 262                 """
 263                 if message is not None:
 264                         self.to_stderr(message)
 265                 if not self.params.get('ignoreerrors', False):
 266                         raise DownloadError(message)
 267                 self._download_retcode = 1
 268
 269         def slow_down(self, start_time, byte_counter):
 270                 """Sleep if the download speed is over the rate limit."""
 271                 rate_limit = self.params.get('ratelimit', None)
 272                 if rate_limit is None or byte_counter == 0:
 273                         return
 274                 now = time.time()
 275                 elapsed = now - start_time
 276                 if elapsed <= 0.0:
 277                         return
 278                 speed = float(byte_counter) / elapsed
 279                 if speed > rate_limit:
 280                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 281
 282         def report_destination(self, filename):
 283                 """Report destination filename."""
 284                 self.to_stdout(u'[download] Destination: %s' % filename)
 285
 286         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 287                 """Report download progress."""
 288                 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
 289                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 290
 291         def report_resuming_byte(self, resume_len):
 292                 """Report attemtp to resume at given byte."""
 293                 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
 294
 295         def report_file_already_downloaded(self, file_name):
 296                 """Report file has already been fully downloaded."""
 297                 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
 298
 299         def report_unable_to_resume(self):
 300                 """Report it was impossible to resume download."""
 301                 self.to_stdout(u'[download] Unable to resume')
 302
 303         def report_finish(self):
 304                 """Report download finished."""
 305                 self.to_stdout(u'')
 306
 307         def process_info(self, info_dict):
 308                 """Process a single dictionary returned by an InfoExtractor."""
 309                 # Do nothing else if in simulate mode
 310                 if self.params.get('simulate', False):
 311                         try:
 312                                 info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
 313                         except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
 314                                 raise UnavailableFormatError
 315
 316                         # Forced printings
 317                         if self.params.get('forcetitle', False):
 318                                 print info_dict['title'].encode(preferredencoding())
 319                         if self.params.get('forceurl', False):
 320                                 print info_dict['url'].encode(preferredencoding())
 321
 322                         return
 323
 324                 try:
 325                         template_dict = dict(info_dict)
 326                         template_dict['epoch'] = unicode(long(time.time()))
 327                         filename = self.params['outtmpl'] % template_dict
 328                 except (ValueError, KeyError), err:
 329                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
 330                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 331                         self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
 332                         return
 333
 334                 try:
 335                         self.pmkdir(filename)
 336                 except (OSError, IOError), err:
 337                         self.trouble('ERROR: unable to create directories: %s' % str(err))
 338                         return
 339
 340                 try:
 341                         success = self._do_download(filename, info_dict['url'].encode('utf-8'))
 342                 except (OSError, IOError), err:
 343                         raise UnavailableFormatError
 344                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 345                         self.trouble('ERROR: unable to download video data: %s' % str(err))
 346                         return
 347                 except (ContentTooShortError, ), err:
 348                         self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 349                         return
 350
 351                 if success:
 352                         try:
 353                                 self.post_process(filename, info_dict)
 354                         except (PostProcessingError), err:
 355                                 self.trouble('ERROR: postprocessing: %s' % str(err))
 356                                 return
 357
 358         def download(self, url_list):
 359                 """Download a given list of URLs."""
 360                 if len(url_list) > 1 and self.fixed_template():
 361                         raise SameFileError(self.params['outtmpl'])
 362
 363                 for url in url_list:
 364                         suitable_found = False
 365                         for ie in self._ies:
 366                                 # Go to next InfoExtractor if not suitable
 367                                 if not ie.suitable(url):
 368                                         continue
 369
 370                                 # Suitable InfoExtractor found
 371                                 suitable_found = True
 372
 373                                 # Extract information from URL and process it
 374                                 ie.extract(url)
 375
 376                                 # Suitable InfoExtractor had been found; go to next URL
 377                                 break
 378
 379                         if not suitable_found:
 380                                 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
 381
 382                 return self._download_retcode
 383
 384         def post_process(self, filename, ie_info):
 385                 """Run the postprocessing chain on the given file."""
 386                 info = dict(ie_info)
 387                 info['filepath'] = filename
 388                 for pp in self._pps:
 389                         info = pp.run(info)
 390                         if info is None:
 391                                 break
 392
 393         def _do_download(self, filename, url):
 394                 stream = None
 395                 open_mode = 'ab'
 396
 397                 basic_request = urllib2.Request(url, None, std_headers)
 398                 request = urllib2.Request(url, None, std_headers)
 399
 400                 # Attempt to resume download with "continuedl" option
 401                 if os.path.isfile(filename):
 402                         resume_len = os.path.getsize(filename)
 403                 else:
 404                         resume_len = 0
 405                 if self.params.get('continuedl', False) and resume_len != 0:
 406                         self.report_resuming_byte(resume_len)
 407                         request.add_header('Range','bytes=%d-' % resume_len)
 408
 409                 # Establish connection
 410                 try:
 411                         data = urllib2.urlopen(request)
 412                 except (urllib2.HTTPError, ), err:
 413                         if err.code != 416: #  416 is 'Requested range not satisfiable'
 414                                 raise
 415                         data = urllib2.urlopen(basic_request)
 416                         content_length = data.info()['Content-Length']
 417                         if content_length is not None and long(content_length) == resume_len:
 418                                 self.report_file_already_downloaded(filename)
 419                                 return True
 420                         else:
 421                                 self.report_unable_to_resume()
 422                                 open_mode = 'wb'
 423
 424                 data_len = data.info().get('Content-length', None)
 425                 data_len_str = self.format_bytes(data_len)
 426                 byte_counter = 0
 427                 block_size = 1024
 428                 start = time.time()
 429                 while True:
 430                         # Download and write
 431                         before = time.time()
 432                         data_block = data.read(block_size)
 433                         after = time.time()
 434                         data_block_len = len(data_block)
 435                         if data_block_len == 0:
 436                                 break
 437                         byte_counter += data_block_len
 438
 439                         # Open file just in time
 440                         if stream is None:
 441                                 try:
 442                                         stream = open(filename, open_mode)
 443                                         self.report_destination(filename)
 444                                 except (OSError, IOError), err:
 445                                         self.trouble('ERROR: unable to open for writing: %s' % str(err))
 446                                         return False
 447                         stream.write(data_block)
 448                         block_size = self.best_block_size(after - before, data_block_len)
 449
 450                         # Progress message
 451                         percent_str = self.calc_percent(byte_counter, data_len)
 452                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
 453                         speed_str = self.calc_speed(start, time.time(), byte_counter)
 454                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 455
 456                         # Apply rate limit
 457                         self.slow_down(start, byte_counter)
 458
 459                 self.report_finish()
 460                 if data_len is not None and str(byte_counter) != data_len:
 461                         raise ContentTooShortError(byte_counter, long(data_len))
 462                 return True
 463
 464 class InfoExtractor(object):
 465         """Information Extractor class.
 466
 467         Information extractors are the classes that, given a URL, extract
 468         information from the video (or videos) the URL refers to. This
 469         information includes the real video URL, the video title and simplified
 470         title, author and others. The information is stored in a dictionary
 471         which is then passed to the FileDownloader. The FileDownloader
 472         processes this information possibly downloading the video to the file
 473         system, among other possible outcomes. The dictionaries must include
 474         the following fields:
 475
 476         id:             Video identifier.
 477         url:            Final video URL.
 478         uploader:       Nickname of the video uploader.
 479         title:          Literal title.
 480         stitle:         Simplified title.
 481         ext:            Video filename extension.
 482
 483         Subclasses of this one should re-define the _real_initialize() and
 484         _real_extract() methods, as well as the suitable() static method.
 485         Probably, they should also be instantiated and added to the main
 486         downloader.
 487         """
 488
 489         _ready = False
 490         _downloader = None
 491
 492         def __init__(self, downloader=None):
 493                 """Constructor. Receives an optional downloader."""
 494                 self._ready = False
 495                 self.set_downloader(downloader)
 496
 497         @staticmethod
 498         def suitable(url):
 499                 """Receives a URL and returns True if suitable for this IE."""
 500                 return False
 501
 502         def initialize(self):
 503                 """Initializes an instance (authentication, etc)."""
 504                 if not self._ready:
 505                         self._real_initialize()
 506                         self._ready = True
 507
 508         def extract(self, url):
 509                 """Extracts URL information and returns it in list of dicts."""
 510                 self.initialize()
 511                 return self._real_extract(url)
 512
 513         def set_downloader(self, downloader):
 514                 """Sets the downloader for this IE."""
 515                 self._downloader = downloader
 516
 517         def _real_initialize(self):
 518                 """Real initialization process. Redefine in subclasses."""
 519                 pass
 520
 521         def _real_extract(self, url):
 522                 """Real extraction process. Redefine in subclasses."""
 523                 pass
 524
 525 class YoutubeIE(InfoExtractor):
 526         """Information extractor for youtube.com."""
 527
 528         _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
 529         _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 530         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
 531         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 532         _NETRC_MACHINE = 'youtube'
 533         _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
 534         _video_extensions = {
 535                 '13': '3gp',
 536                 '17': 'mp4',
 537                 '18': 'mp4',
 538                 '22': 'mp4',
 539                 '37': 'mp4',
 540         }
 541
 542         @staticmethod
 543         def suitable(url):
 544                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 545
 546         @staticmethod
 547         def htmlentity_transform(matchobj):
 548                 """Transforms an HTML entity to a Unicode character."""
 549                 entity = matchobj.group(1)
 550
 551                 # Known non-numeric HTML entity
 552                 if entity in htmlentitydefs.name2codepoint:
 553                         return unichr(htmlentitydefs.name2codepoint[entity])
 554
 555                 # Unicode character
 556                 mobj = re.match(ur'(?u)#(x?\d+)', entity)
 557                 if mobj is not None:
 558                         numstr = mobj.group(1)
 559                         if numstr.startswith(u'x'):
 560                                 base = 16
 561                                 numstr = u'0%s' % numstr
 562                         else:
 563                                 base = 10
 564                         return unichr(long(numstr, base))
 565
 566                 # Unknown entity in name, return its literal representation
 567                 return (u'&%s;' % entity)
 568
 569         def report_lang(self):
 570                 """Report attempt to set language."""
 571                 self._downloader.to_stdout(u'[youtube] Setting language')
 572
 573         def report_login(self):
 574                 """Report attempt to log in."""
 575                 self._downloader.to_stdout(u'[youtube] Logging in')
 576
 577         def report_age_confirmation(self):
 578                 """Report attempt to confirm age."""
 579                 self._downloader.to_stdout(u'[youtube] Confirming age')
 580
 581         def report_video_info_webpage_download(self, video_id):
 582                 """Report attempt to download video info webpage."""
 583                 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
 584
 585         def report_information_extraction(self, video_id):
 586                 """Report attempt to extract video information."""
 587                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
 588
 589         def report_unavailable_format(self, video_id, format):
 590                 """Report extracted video URL."""
 591                 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
 592
 593         def _real_initialize(self):
 594                 if self._downloader is None:
 595                         return
 596
 597                 username = None
 598                 password = None
 599                 downloader_params = self._downloader.params
 600
 601                 # Attempt to use provided username and password or .netrc data
 602                 if downloader_params.get('username', None) is not None:
 603                         username = downloader_params['username']
 604                         password = downloader_params['password']
 605                 elif downloader_params.get('usenetrc', False):
 606                         try:
 607                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 608                                 if info is not None:
 609                                         username = info[0]
 610                                         password = info[2]
 611                                 else:
 612                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 613                         except (IOError, netrc.NetrcParseError), err:
 614                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 615                                 return
 616
 617                 # Set language
 618                 request = urllib2.Request(self._LANG_URL, None, std_headers)
 619                 try:
 620                         self.report_lang()
 621                         urllib2.urlopen(request).read()
 622                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 623                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 624                         return
 625
 626                 # No authentication to be performed
 627                 if username is None:
 628                         return
 629
 630                 # Log in
 631                 login_form = {
 632                                 'current_form': 'loginForm',
 633                                 'next':         '/',
 634                                 'action_login': 'Log In',
 635                                 'username':     username,
 636                                 'password':     password,
 637                                 }
 638                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
 639                 try:
 640                         self.report_login()
 641                         login_results = urllib2.urlopen(request).read()
 642                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 643                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 644                                 return
 645                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 646                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 647                         return
 648
 649                 # Confirm age
 650                 age_form = {
 651                                 'next_url':             '/',
 652                                 'action_confirm':       'Confirm',
 653                                 }
 654                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
 655                 try:
 656                         self.report_age_confirmation()
 657                         age_results = urllib2.urlopen(request).read()
 658                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 659                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 660                         return
 661
 662         def _real_extract(self, url):
 663                 # Extract video id from URL
 664                 mobj = re.match(self._VALID_URL, url)
 665                 if mobj is None:
 666                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 667                         return
 668                 video_id = mobj.group(2)
 669
 670                 # Downloader parameters
 671                 best_quality = False
 672                 format_param = None
 673                 quality_index = 0
 674                 if self._downloader is not None:
 675                         params = self._downloader.params
 676                         format_param = params.get('format', None)
 677                         if format_param == '0':
 678                                 format_param = self._available_formats[quality_index]
 679                                 best_quality = True
 680
 681                 while True:
 682                         # Extension
 683                         video_extension = self._video_extensions.get(format_param, 'flv')
 684
 685                         # Get video info
 686                         video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id
 687                         request = urllib2.Request(video_info_url, None, std_headers)
 688                         try:
 689                                 self.report_video_info_webpage_download(video_id)
 690                                 video_info_webpage = urllib2.urlopen(request).read()
 691                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 692                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
 693                                 return
 694                         self.report_information_extraction(video_id)
 695
 696                         # "t" param
 697                         mobj = re.search(r'(?m)&token=([^&]+)(?:&|$)', video_info_webpage)
 698                         if mobj is None:
 699                                 # Attempt to see if YouTube has issued an error message
 700                                 mobj = re.search(r'(?m)&reason=([^&]+)(?:&|$)', video_info_webpage)
 701                                 if mobj is None:
 702                                         self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
 703                                         stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
 704                                         stream.write(video_info_webpage)
 705                                         stream.close()
 706                                 else:
 707                                         reason = urllib.unquote_plus(mobj.group(1))
 708                                         self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
 709                                 return
 710                         token = urllib.unquote(mobj.group(1))
 711                         video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
 712                         if format_param is not None:
 713                                 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
 714
 715                         # uploader
 716                         mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage)
 717                         if mobj is None:
 718                                 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 719                                 return
 720                         video_uploader = urllib.unquote(mobj.group(1))
 721
 722                         # title
 723                         mobj = re.search(r'(?m)&title=([^&]*)(?:&|$)', video_info_webpage)
 724                         if mobj is None:
 725                                 self._downloader.trouble(u'ERROR: unable to extract video title')
 726                                 return
 727                         video_title = urllib.unquote_plus(mobj.group(1))
 728                         video_title = video_title.decode('utf-8')
 729                         video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
 730                         video_title = video_title.replace(os.sep, u'%')
 731
 732                         # simplified title
 733                         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 734                         simple_title = simple_title.strip(ur'_')
 735
 736                         try:
 737                                 # Process video information
 738                                 self._downloader.process_info({
 739                                         'id':           video_id.decode('utf-8'),
 740                                         'url':          video_real_url.decode('utf-8'),
 741                                         'uploader':     video_uploader.decode('utf-8'),
 742                                         'title':        video_title,
 743                                         'stitle':       simple_title,
 744                                         'ext':          video_extension.decode('utf-8'),
 745                                 })
 746
 747                                 return
 748
 749                         except UnavailableFormatError, err:
 750                                 if best_quality:
 751                                         if quality_index == len(self._available_formats) - 1:
 752                                                 # I don't ever expect this to happen
 753                                                 self._downloader.trouble(u'ERROR: no known formats available for video')
 754                                                 return
 755                                         else:
 756                                                 self.report_unavailable_format(video_id, format_param)
 757                                                 quality_index += 1
 758                                                 format_param = self._available_formats[quality_index]
 759                                                 continue
 760                                 else:
 761                                         self._downloader.trouble('ERROR: format not available for video')
 762                                         return
 763
 764
 765 class MetacafeIE(InfoExtractor):
 766         """Information Extractor for metacafe.com."""
 767
 768         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
 769         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
 770         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
 771         _youtube_ie = None
 772
 773         def __init__(self, youtube_ie, downloader=None):
 774                 InfoExtractor.__init__(self, downloader)
 775                 self._youtube_ie = youtube_ie
 776
 777         @staticmethod
 778         def suitable(url):
 779                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
 780
 781         def report_disclaimer(self):
 782                 """Report disclaimer retrieval."""
 783                 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
 784
 785         def report_age_confirmation(self):
 786                 """Report attempt to confirm age."""
 787                 self._downloader.to_stdout(u'[metacafe] Confirming age')
 788
 789         def report_download_webpage(self, video_id):
 790                 """Report webpage download."""
 791                 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
 792
 793         def report_extraction(self, video_id):
 794                 """Report information extraction."""
 795                 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
 796
 797         def _real_initialize(self):
 798                 # Retrieve disclaimer
 799                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
 800                 try:
 801                         self.report_disclaimer()
 802                         disclaimer = urllib2.urlopen(request).read()
 803                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 804                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
 805                         return
 806
 807                 # Confirm age
 808                 disclaimer_form = {
 809                         'filters': '0',
 810                         'submit': "Continue - I'm over 18",
 811                         }
 812                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
 813                 try:
 814                         self.report_age_confirmation()
 815                         disclaimer = urllib2.urlopen(request).read()
 816                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 817                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 818                         return
 819
 820         def _real_extract(self, url):
 821                 # Extract id and simplified title from URL
 822                 mobj = re.match(self._VALID_URL, url)
 823                 if mobj is None:
 824                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 825                         return
 826
 827                 video_id = mobj.group(1)
 828
 829                 # Check if video comes from YouTube
 830                 mobj2 = re.match(r'^yt-(.*)$', video_id)
 831                 if mobj2 is not None:
 832                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
 833                         return
 834
 835                 simple_title = mobj.group(2).decode('utf-8')
 836                 video_extension = 'flv'
 837
 838                 # Retrieve video webpage to extract further information
 839                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
 840                 try:
 841                         self.report_download_webpage(video_id)
 842                         webpage = urllib2.urlopen(request).read()
 843                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 844                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
 845                         return
 846
 847                 # Extract URL, uploader and title from webpage
 848                 self.report_extraction(video_id)
 849                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
 850                 if mobj is None:
 851                         self._downloader.trouble(u'ERROR: unable to extract media URL')
 852                         return
 853                 mediaURL = urllib.unquote(mobj.group(1))
 854
 855                 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
 856                 #if mobj is None:
 857                 #       self._downloader.trouble(u'ERROR: unable to extract gdaKey')
 858                 #       return
 859                 #gdaKey = mobj.group(1)
 860                 #
 861                 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
 862
 863                 video_url = mediaURL
 864
 865                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
 866                 if mobj is None:
 867                         self._downloader.trouble(u'ERROR: unable to extract title')
 868                         return
 869                 video_title = mobj.group(1).decode('utf-8')
 870
 871                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
 872                 if mobj is None:
 873                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 874                         return
 875                 video_uploader = mobj.group(1)
 876
 877                 try:
 878                         # Process video information
 879                         self._downloader.process_info({
 880                                 'id':           video_id.decode('utf-8'),
 881                                 'url':          video_url.decode('utf-8'),
 882                                 'uploader':     video_uploader.decode('utf-8'),
 883                                 'title':        video_title,
 884                                 'stitle':       simple_title,
 885                                 'ext':          video_extension.decode('utf-8'),
 886                         })
 887                 except UnavailableFormatError:
 888                         self._downloader.trouble(u'ERROR: format not available for video')
 889
 890
 891 class YoutubeSearchIE(InfoExtractor):
 892         """Information Extractor for YouTube search queries."""
 893         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
 894         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
 895         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
 896         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
 897         _youtube_ie = None
 898         _max_youtube_results = 1000
 899
 900         def __init__(self, youtube_ie, downloader=None):
 901                 InfoExtractor.__init__(self, downloader)
 902                 self._youtube_ie = youtube_ie
 903
 904         @staticmethod
 905         def suitable(url):
 906                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
 907
 908         def report_download_page(self, query, pagenum):
 909                 """Report attempt to download playlist page with given number."""
 910                 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
 911
 912         def _real_initialize(self):
 913                 self._youtube_ie.initialize()
 914
 915         def _real_extract(self, query):
 916                 mobj = re.match(self._VALID_QUERY, query)
 917                 if mobj is None:
 918                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
 919                         return
 920
 921                 prefix, query = query.split(':')
 922                 prefix = prefix[8:]
 923                 if prefix == '':
 924                         self._download_n_results(query, 1)
 925                         return
 926                 elif prefix == 'all':
 927                         self._download_n_results(query, self._max_youtube_results)
 928                         return
 929                 else:
 930                         try:
 931                                 n = long(prefix)
 932                                 if n <= 0:
 933                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 934                                         return
 935                                 elif n > self._max_youtube_results:
 936                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
 937                                         n = self._max_youtube_results
 938                                 self._download_n_results(query, n)
 939                                 return
 940                         except ValueError: # parsing prefix as integer fails
 941                                 self._download_n_results(query, 1)
 942                                 return
 943
 944         def _download_n_results(self, query, n):
 945                 """Downloads a specified number of results for a query"""
 946
 947                 video_ids = []
 948                 already_seen = set()
 949                 pagenum = 1
 950
 951                 while True:
 952                         self.report_download_page(query, pagenum)
 953                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
 954                         request = urllib2.Request(result_url, None, std_headers)
 955                         try:
 956                                 page = urllib2.urlopen(request).read()
 957                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 958                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
 959                                 return
 960
 961                         # Extract video identifiers
 962                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
 963                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
 964                                 if video_id not in already_seen:
 965                                         video_ids.append(video_id)
 966                                         already_seen.add(video_id)
 967                                         if len(video_ids) == n:
 968                                                 # Specified n videos reached
 969                                                 for id in video_ids:
 970                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
 971                                                 return
 972
 973                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
 974                                 for id in video_ids:
 975                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
 976                                 return
 977
 978                         pagenum = pagenum + 1
 979
 980 class YoutubePlaylistIE(InfoExtractor):
 981         """Information Extractor for YouTube playlists."""
 982
 983         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
 984         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
 985         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
 986         _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
 987         _youtube_ie = None
 988
 989         def __init__(self, youtube_ie, downloader=None):
 990                 InfoExtractor.__init__(self, downloader)
 991                 self._youtube_ie = youtube_ie
 992
 993         @staticmethod
 994         def suitable(url):
 995                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
 996
 997         def report_download_page(self, playlist_id, pagenum):
 998                 """Report attempt to download playlist page with given number."""
 999                 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1000
1001         def _real_initialize(self):
1002                 self._youtube_ie.initialize()
1003
1004         def _real_extract(self, url):
1005                 # Extract playlist id
1006                 mobj = re.match(self._VALID_URL, url)
1007                 if mobj is None:
1008                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1009                         return
1010
1011                 # Download playlist pages
1012                 playlist_id = mobj.group(1)
1013                 video_ids = []
1014                 pagenum = 1
1015
1016                 while True:
1017                         self.report_download_page(playlist_id, pagenum)
1018                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1019                         try:
1020                                 page = urllib2.urlopen(request).read()
1021                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1022                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1023                                 return
1024
1025                         # Extract video identifiers
1026                         ids_in_page = []
1027                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1028                                 if mobj.group(1) not in ids_in_page:
1029                                         ids_in_page.append(mobj.group(1))
1030                         video_ids.extend(ids_in_page)
1031
1032                         if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
1033                                 break
1034                         pagenum = pagenum + 1
1035
1036                 for id in video_ids:
1037                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1038                 return
1039
1040 class YoutubeUserIE(InfoExtractor):
1041         """Information Extractor for YouTube users."""
1042
1043         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1044         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1045         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)'
1046         _youtube_ie = None
1047
1048         def __init__(self, youtube_ie, downloader=None):
1049                 InfoExtractor.__init__(self, downloader)
1050                 self._youtube_ie = youtube_ie
1051
1052         @staticmethod
1053         def suitable(url):
1054                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1055
1056         def report_download_page(self, username):
1057                 """Report attempt to download user page."""
1058                 self._downloader.to_stdout(u'[youtube] USR %s: Downloading page ' % (username))
1059
1060         def _real_initialize(self):
1061                 self._youtube_ie.initialize()
1062
1063         def _real_extract(self, url):
1064                 # Extract username
1065                 mobj = re.match(self._VALID_URL, url)
1066                 if mobj is None:
1067                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1068                         return
1069
1070                 # Download user page
1071                 username = mobj.group(1)
1072                 video_ids = []
1073                 pagenum = 1
1074
1075                 self.report_download_page(username)
1076                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1077                 try:
1078                         page = urllib2.urlopen(request).read()
1079                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1080                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1081                         return
1082
1083                 # Extract video identifiers
1084                 ids_in_page = []
1085
1086                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1087                         print mobj.group(1)
1088                         if mobj.group(1) not in ids_in_page:
1089                                 ids_in_page.append(mobj.group(1))
1090                 video_ids.extend(ids_in_page)
1091
1092                 for id in video_ids:
1093                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1094                 return
1095
1096 class PostProcessor(object):
1097         """Post Processor class.
1098
1099         PostProcessor objects can be added to downloaders with their
1100         add_post_processor() method. When the downloader has finished a
1101         successful download, it will take its internal chain of PostProcessors
1102         and start calling the run() method on each one of them, first with
1103         an initial argument and then with the returned value of the previous
1104         PostProcessor.
1105
1106         The chain will be stopped if one of them ever returns None or the end
1107         of the chain is reached.
1108
1109         PostProcessor objects follow a "mutual registration" process similar
1110         to InfoExtractor objects.
1111         """
1112
1113         _downloader = None
1114
1115         def __init__(self, downloader=None):
1116                 self._downloader = downloader
1117
1118         def set_downloader(self, downloader):
1119                 """Sets the downloader for this PP."""
1120                 self._downloader = downloader
1121
1122         def run(self, information):
1123                 """Run the PostProcessor.
1124
1125                 The "information" argument is a dictionary like the ones
1126                 composed by InfoExtractors. The only difference is that this
1127                 one has an extra field called "filepath" that points to the
1128                 downloaded file.
1129
1130                 When this method returns None, the postprocessing chain is
1131                 stopped. However, this method may return an information
1132                 dictionary that will be passed to the next postprocessing
1133                 object in the chain. It can be the one it received after
1134                 changing some fields.
1135
1136                 In addition, this method may raise a PostProcessingError
1137                 exception that will be taken into account by the downloader
1138                 it was called from.
1139                 """
1140                 return information # by default, do nothing
1141
1142 ### MAIN PROGRAM ###
1143 if __name__ == '__main__':
1144         try:
1145                 # Modules needed only when running the main program
1146                 import getpass
1147                 import optparse
1148
1149                 # Function to update the program file with the latest version from bitbucket.org
1150                 def update_self(downloader, filename):
1151                         # Note: downloader only used for options
1152                         if not os.access (filename, os.W_OK):
1153                                 sys.exit('ERROR: no write permissions on %s' % filename)
1154
1155                         downloader.to_stdout('Updating to latest stable version...')
1156                         latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1157                         latest_version = urllib.urlopen(latest_url).read().strip()
1158                         prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1159                         newcontent = urllib.urlopen(prog_url).read()
1160                         stream = open(filename, 'w')
1161                         stream.write(newcontent)
1162                         stream.close()
1163                         downloader.to_stdout('Updated to version %s' % latest_version)
1164
1165                 # General configuration
1166                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
1167                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
1168                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1169
1170                 # Parse command line
1171                 parser = optparse.OptionParser(
1172                         usage='Usage: %prog [options] url...',
1173                         version='INTERNAL',
1174                         conflict_handler='resolve',
1175                 )
1176
1177                 parser.add_option('-h', '--help',
1178                                 action='help', help='print this help text and exit')
1179                 parser.add_option('-v', '--version',
1180                                 action='version', help='print program version and exit')
1181                 parser.add_option('-U', '--update',
1182                                 action='store_true', dest='update_self', help='update this program to latest stable version')
1183                 parser.add_option('-i', '--ignore-errors',
1184                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
1185                 parser.add_option('-r', '--rate-limit',
1186                                 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
1187
1188                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
1189                 authentication.add_option('-u', '--username',
1190                                 dest='username', metavar='UN', help='account username')
1191                 authentication.add_option('-p', '--password',
1192                                 dest='password', metavar='PW', help='account password')
1193                 authentication.add_option('-n', '--netrc',
1194                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
1195                 parser.add_option_group(authentication)
1196
1197                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
1198                 video_format.add_option('-f', '--format',
1199                                 action='store', dest='format', metavar='FMT', help='video format code')
1200                 video_format.add_option('-b', '--best-quality',
1201                                 action='store_const', dest='format', help='download the best quality video possible', const='0')
1202                 video_format.add_option('-m', '--mobile-version',
1203                                 action='store_const', dest='format', help='alias for -f 17', const='17')
1204                 video_format.add_option('-d', '--high-def',
1205                                 action='store_const', dest='format', help='alias for -f 22', const='22')
1206                 parser.add_option_group(video_format)
1207
1208                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
1209                 verbosity.add_option('-q', '--quiet',
1210                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
1211                 verbosity.add_option('-s', '--simulate',
1212                                 action='store_true', dest='simulate', help='do not download video', default=False)
1213                 verbosity.add_option('-g', '--get-url',
1214                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
1215                 verbosity.add_option('-e', '--get-title',
1216                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
1217                 parser.add_option_group(verbosity)
1218
1219                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
1220                 filesystem.add_option('-t', '--title',
1221                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
1222                 filesystem.add_option('-l', '--literal',
1223                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
1224                 filesystem.add_option('-o', '--output',
1225                                 dest='outtmpl', metavar='TPL', help='output filename template')
1226                 filesystem.add_option('-a', '--batch-file',
1227                                 dest='batchfile', metavar='F', help='file containing URLs to download')
1228                 filesystem.add_option('-w', '--no-overwrites',
1229                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
1230                 filesystem.add_option('-c', '--continue',
1231                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
1232                 parser.add_option_group(filesystem)
1233
1234                 (opts, args) = parser.parse_args()
1235
1236                 # Batch file verification
1237                 batchurls = []
1238                 if opts.batchfile is not None:
1239                         try:
1240                                 batchurls = open(opts.batchfile, 'r').readlines()
1241                                 batchurls = [x.strip() for x in batchurls]
1242                                 batchurls = [x for x in batchurls if len(x) > 0]
1243                         except IOError:
1244                                 sys.exit(u'ERROR: batch file could not be read')
1245                 all_urls = batchurls + args
1246
1247                 # Conflicting, missing and erroneous options
1248                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
1249                         parser.error(u'using .netrc conflicts with giving username/password')
1250                 if opts.password is not None and opts.username is None:
1251                         parser.error(u'account username missing')
1252                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
1253                         parser.error(u'using output template conflicts with using title or literal title')
1254                 if opts.usetitle and opts.useliteral:
1255                         parser.error(u'using title conflicts with using literal title')
1256                 if opts.username is not None and opts.password is None:
1257                         opts.password = getpass.getpass(u'Type account password and press return:')
1258                 if opts.ratelimit is not None:
1259                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
1260                         if numeric_limit is None:
1261                                 parser.error(u'invalid rate limit specified')
1262                         opts.ratelimit = numeric_limit
1263
1264                 # Information extractors
1265                 youtube_ie = YoutubeIE()
1266                 metacafe_ie = MetacafeIE(youtube_ie)
1267                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
1268                 youtube_user_ie = YoutubeUserIE(youtube_ie)
1269                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
1270
1271                 # File downloader
1272                 fd = FileDownloader({
1273                         'usenetrc': opts.usenetrc,
1274                         'username': opts.username,
1275                         'password': opts.password,
1276                         'quiet': (opts.quiet or opts.geturl or opts.gettitle),
1277                         'forceurl': opts.geturl,
1278                         'forcetitle': opts.gettitle,
1279                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
1280                         'format': opts.format,
1281                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
1282                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
1283                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
1284                                 or u'%(id)s.%(ext)s'),
1285                         'ignoreerrors': opts.ignoreerrors,
1286                         'ratelimit': opts.ratelimit,
1287                         'nooverwrites': opts.nooverwrites,
1288                         'continuedl': opts.continue_dl,
1289                         })
1290                 fd.add_info_extractor(youtube_search_ie)
1291                 fd.add_info_extractor(youtube_pl_ie)
1292                 fd.add_info_extractor(youtube_user_ie)
1293                 fd.add_info_extractor(metacafe_ie)
1294                 fd.add_info_extractor(youtube_ie)
1295
1296                 # Update version
1297                 if opts.update_self:
1298                         update_self(fd, sys.argv[0])
1299
1300                 # Maybe do nothing
1301                 if len(all_urls) < 1:
1302                         if not opts.update_self:
1303                                 parser.error(u'you must provide at least one URL')
1304                         else:
1305                                 sys.exit()
1306                 retcode = fd.download(all_urls)
1307                 sys.exit(retcode)
1308
1309         except DownloadError:
1310                 sys.exit(1)
1311         except SameFileError:
1312                 sys.exit(u'ERROR: fixed output name but more than one file to download')
1313         except KeyboardInterrupt:
1314                 sys.exit(u'\nERROR: Interrupted by user')