youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # Author: Ricardo Garcia Gonzalez
   4 # Author: Danny Colligan
   5 # License: Public domain code
   6 import htmlentitydefs
   7 import httplib
   8 import locale
   9 import math
  10 import netrc
  11 import os
  12 import os.path
  13 import re
  14 import socket
  15 import string
  16 import subprocess
  17 import sys
  18 import time
  19 import urllib
  20 import urllib2
  21
  22 # parse_qs was moved from the cgi module to the urlparse module recently.
  23 try:
  24         from urlparse import parse_qs
  25 except ImportError:
  26         from cgi import parse_qs
  27
  28 std_headers = {
  29         'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
  30         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  31         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
  32         'Accept-Language': 'en-us,en;q=0.5',
  33 }
  34
  35 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  36
  37 def preferredencoding():
  38         """Get preferred encoding.
  39
  40         Returns the best encoding scheme for the system, based on
  41         locale.getpreferredencoding() and some further tweaks.
  42         """
  43         def yield_preferredencoding():
  44                 try:
  45                         pref = locale.getpreferredencoding()
  46                         u'TEST'.encode(pref)
  47                 except:
  48                         pref = 'UTF-8'
  49                 while True:
  50                         yield pref
  51         return yield_preferredencoding().next()
  52
  53 class DownloadError(Exception):
  54         """Download Error exception.
  55
  56         This exception may be thrown by FileDownloader objects if they are not
  57         configured to continue on errors. They will contain the appropriate
  58         error message.
  59         """
  60         pass
  61
  62 class SameFileError(Exception):
  63         """Same File exception.
  64
  65         This exception will be thrown by FileDownloader objects if they detect
  66         multiple files would have to be downloaded to the same file on disk.
  67         """
  68         pass
  69
  70 class PostProcessingError(Exception):
  71         """Post Processing exception.
  72
  73         This exception may be raised by PostProcessor's .run() method to
  74         indicate an error in the postprocessing task.
  75         """
  76         pass
  77
  78 class UnavailableFormatError(Exception):
  79         """Unavailable Format exception.
  80
  81         This exception will be thrown when a video is requested
  82         in a format that is not available for that video.
  83         """
  84         pass
  85
  86 class ContentTooShortError(Exception):
  87         """Content Too Short exception.
  88
  89         This exception may be raised by FileDownloader objects when a file they
  90         download is too small for what the server announced first, indicating
  91         the connection was probably interrupted.
  92         """
  93         # Both in bytes
  94         downloaded = None
  95         expected = None
  96
  97         def __init__(self, downloaded, expected):
  98                 self.downloaded = downloaded
  99                 self.expected = expected
 100
 101 class FileDownloader(object):
 102         """File Downloader class.
 103
 104         File downloader objects are the ones responsible of downloading the
 105         actual video file and writing it to disk if the user has requested
 106         it, among some other tasks. In most cases there should be one per
 107         program. As, given a video URL, the downloader doesn't know how to
 108         extract all the needed information, task that InfoExtractors do, it
 109         has to pass the URL to one of them.
 110
 111         For this, file downloader objects have a method that allows
 112         InfoExtractors to be registered in a given order. When it is passed
 113         a URL, the file downloader handles it to the first InfoExtractor it
 114         finds that reports being able to handle it. The InfoExtractor extracts
 115         all the information about the video or videos the URL refers to, and
 116         asks the FileDownloader to process the video information, possibly
 117         downloading the video.
 118
 119         File downloaders accept a lot of parameters. In order not to saturate
 120         the object constructor with arguments, it receives a dictionary of
 121         options instead. These options are available through the params
 122         attribute for the InfoExtractors to use. The FileDownloader also
 123         registers itself as the downloader in charge for the InfoExtractors
 124         that are added to it, so this is a "mutual registration".
 125
 126         Available options:
 127
 128         username:       Username for authentication purposes.
 129         password:       Password for authentication purposes.
 130         usenetrc:       Use netrc for authentication instead.
 131         quiet:          Do not print messages to stdout.
 132         forceurl:       Force printing final URL.
 133         forcetitle:     Force printing title.
 134         simulate:       Do not download the video files.
 135         format:         Video format code.
 136         outtmpl:        Template for output names.
 137         ignoreerrors:   Do not stop on download errors.
 138         ratelimit:      Download speed limit, in bytes/sec.
 139         nooverwrites:   Prevent overwriting files.
 140         continuedl:     Try to continue downloads if possible.
 141         """
 142
 143         params = None
 144         _ies = []
 145         _pps = []
 146         _download_retcode = None
 147
 148         def __init__(self, params):
 149                 """Create a FileDownloader object with the given options."""
 150                 self._ies = []
 151                 self._pps = []
 152                 self._download_retcode = 0
 153                 self.params = params
 154
 155         @staticmethod
 156         def pmkdir(filename):
 157                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 158                 components = filename.split(os.sep)
 159                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 160                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 161                 for dir in aggregate:
 162                         if not os.path.exists(dir):
 163                                 os.mkdir(dir)
 164
 165         @staticmethod
 166         def format_bytes(bytes):
 167                 if bytes is None:
 168                         return 'N/A'
 169                 if type(bytes) is str:
 170                         bytes = float(bytes)
 171                 if bytes == 0.0:
 172                         exponent = 0
 173                 else:
 174                         exponent = long(math.log(bytes, 1024.0))
 175                 suffix = 'bkMGTPEZY'[exponent]
 176                 converted = float(bytes) / float(1024**exponent)
 177                 return '%.2f%s' % (converted, suffix)
 178
 179         @staticmethod
 180         def calc_percent(byte_counter, data_len):
 181                 if data_len is None:
 182                         return '---.-%'
 183                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 184
 185         @staticmethod
 186         def calc_eta(start, now, total, current):
 187                 if total is None:
 188                         return '--:--'
 189                 dif = now - start
 190                 if current == 0 or dif < 0.001: # One millisecond
 191                         return '--:--'
 192                 rate = float(current) / dif
 193                 eta = long((float(total) - float(current)) / rate)
 194                 (eta_mins, eta_secs) = divmod(eta, 60)
 195                 if eta_mins > 99:
 196                         return '--:--'
 197                 return '%02d:%02d' % (eta_mins, eta_secs)
 198
 199         @staticmethod
 200         def calc_speed(start, now, bytes):
 201                 dif = now - start
 202                 if bytes == 0 or dif < 0.001: # One millisecond
 203                         return '%10s' % '---b/s'
 204                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 205
 206         @staticmethod
 207         def best_block_size(elapsed_time, bytes):
 208                 new_min = max(bytes / 2.0, 1.0)
 209                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 210                 if elapsed_time < 0.001:
 211                         return long(new_max)
 212                 rate = bytes / elapsed_time
 213                 if rate > new_max:
 214                         return long(new_max)
 215                 if rate < new_min:
 216                         return long(new_min)
 217                 return long(rate)
 218
 219         @staticmethod
 220         def parse_bytes(bytestr):
 221                 """Parse a string indicating a byte quantity into a long integer."""
 222                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 223                 if matchobj is None:
 224                         return None
 225                 number = float(matchobj.group(1))
 226                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 227                 return long(round(number * multiplier))
 228
 229         @staticmethod
 230         def verify_url(url):
 231                 """Verify a URL is valid and data could be downloaded. Return real data URL."""
 232                 request = urllib2.Request(url, None, std_headers)
 233                 data = urllib2.urlopen(request)
 234                 data.read(1)
 235                 url = data.geturl()
 236                 data.close()
 237                 return url
 238
 239         def add_info_extractor(self, ie):
 240                 """Add an InfoExtractor object to the end of the list."""
 241                 self._ies.append(ie)
 242                 ie.set_downloader(self)
 243
 244         def add_post_processor(self, pp):
 245                 """Add a PostProcessor object to the end of the chain."""
 246                 self._pps.append(pp)
 247                 pp.set_downloader(self)
 248
 249         def to_stdout(self, message, skip_eol=False):
 250                 """Print message to stdout if not in quiet mode."""
 251                 if not self.params.get('quiet', False):
 252                         print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
 253                         sys.stdout.flush()
 254
 255         def to_stderr(self, message):
 256                 """Print message to stderr."""
 257                 print >>sys.stderr, message.encode(preferredencoding())
 258
 259         def fixed_template(self):
 260                 """Checks if the output template is fixed."""
 261                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 262
 263         def trouble(self, message=None):
 264                 """Determine action to take when a download problem appears.
 265
 266                 Depending on if the downloader has been configured to ignore
 267                 download errors or not, this method may throw an exception or
 268                 not when errors are found, after printing the message.
 269                 """
 270                 if message is not None:
 271                         self.to_stderr(message)
 272                 if not self.params.get('ignoreerrors', False):
 273                         raise DownloadError(message)
 274                 self._download_retcode = 1
 275
 276         def slow_down(self, start_time, byte_counter):
 277                 """Sleep if the download speed is over the rate limit."""
 278                 rate_limit = self.params.get('ratelimit', None)
 279                 if rate_limit is None or byte_counter == 0:
 280                         return
 281                 now = time.time()
 282                 elapsed = now - start_time
 283                 if elapsed <= 0.0:
 284                         return
 285                 speed = float(byte_counter) / elapsed
 286                 if speed > rate_limit:
 287                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 288
 289         def report_destination(self, filename):
 290                 """Report destination filename."""
 291                 self.to_stdout(u'[download] Destination: %s' % filename)
 292
 293         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 294                 """Report download progress."""
 295                 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
 296                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 297
 298         def report_resuming_byte(self, resume_len):
 299                 """Report attemtp to resume at given byte."""
 300                 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
 301
 302         def report_file_already_downloaded(self, file_name):
 303                 """Report file has already been fully downloaded."""
 304                 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
 305
 306         def report_unable_to_resume(self):
 307                 """Report it was impossible to resume download."""
 308                 self.to_stdout(u'[download] Unable to resume')
 309
 310         def report_finish(self):
 311                 """Report download finished."""
 312                 self.to_stdout(u'')
 313
 314         def process_info(self, info_dict):
 315                 """Process a single dictionary returned by an InfoExtractor."""
 316                 # Do nothing else if in simulate mode
 317                 if self.params.get('simulate', False):
 318                         try:
 319                                 info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
 320                         except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
 321                                 raise UnavailableFormatError
 322
 323                         # Forced printings
 324                         if self.params.get('forcetitle', False):
 325                                 print info_dict['title'].encode(preferredencoding())
 326                         if self.params.get('forceurl', False):
 327                                 print info_dict['url'].encode(preferredencoding())
 328
 329                         return
 330
 331                 try:
 332                         template_dict = dict(info_dict)
 333                         template_dict['epoch'] = unicode(long(time.time()))
 334                         filename = self.params['outtmpl'] % template_dict
 335                 except (ValueError, KeyError), err:
 336                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
 337                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 338                         self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
 339                         return
 340
 341                 try:
 342                         self.pmkdir(filename)
 343                 except (OSError, IOError), err:
 344                         self.trouble('ERROR: unable to create directories: %s' % str(err))
 345                         return
 346
 347                 try:
 348                         success = self._do_download(filename, info_dict['url'].encode('utf-8'))
 349                 except (OSError, IOError), err:
 350                         raise UnavailableFormatError
 351                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 352                         self.trouble('ERROR: unable to download video data: %s' % str(err))
 353                         return
 354                 except (ContentTooShortError, ), err:
 355                         self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 356                         return
 357
 358                 if success:
 359                         try:
 360                                 self.post_process(filename, info_dict)
 361                         except (PostProcessingError), err:
 362                                 self.trouble('ERROR: postprocessing: %s' % str(err))
 363                                 return
 364
 365         def download(self, url_list):
 366                 """Download a given list of URLs."""
 367                 if len(url_list) > 1 and self.fixed_template():
 368                         raise SameFileError(self.params['outtmpl'])
 369
 370                 for url in url_list:
 371                         suitable_found = False
 372                         for ie in self._ies:
 373                                 # Go to next InfoExtractor if not suitable
 374                                 if not ie.suitable(url):
 375                                         continue
 376
 377                                 # Suitable InfoExtractor found
 378                                 suitable_found = True
 379
 380                                 # Extract information from URL and process it
 381                                 ie.extract(url)
 382
 383                                 # Suitable InfoExtractor had been found; go to next URL
 384                                 break
 385
 386                         if not suitable_found:
 387                                 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
 388
 389                 return self._download_retcode
 390
 391         def post_process(self, filename, ie_info):
 392                 """Run the postprocessing chain on the given file."""
 393                 info = dict(ie_info)
 394                 info['filepath'] = filename
 395                 for pp in self._pps:
 396                         info = pp.run(info)
 397                         if info is None:
 398                                 break
 399
 400         def _download_with_rtmpdump(self, filename, url):
 401                 self.report_destination(filename)
 402
 403                 # Check for rtmpdump first
 404                 try:
 405                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 406                 except (OSError, IOError):
 407                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 408                         return False
 409
 410                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 411                 # the connection was interrumpted and resuming appears to be
 412                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 413                 retval = subprocess.call(['rtmpdump', '-q', '-r', url, '-o', filename] + [[], ['-e']][self.params.get('continuedl', False)])
 414                 while retval == 2:
 415                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True)
 416                         time.sleep(2.0) # This seems to be needed
 417                         retval = subprocess.call(['rtmpdump', '-q', '-e', '-r', url, '-o', filename])
 418                 if retval == 0:
 419                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
 420                         return True
 421                 else:
 422                         self.trouble('ERROR: rtmpdump exited with code %d' % retval)
 423                         return False
 424
 425         def _do_download(self, filename, url):
 426                 # Attempt to download using rtmpdump
 427                 if url.startswith('rtmp'):
 428                         return self._download_with_rtmpdump(filename, url)
 429
 430                 stream = None
 431                 open_mode = 'wb'
 432                 basic_request = urllib2.Request(url, None, std_headers)
 433                 request = urllib2.Request(url, None, std_headers)
 434
 435                 # Establish possible resume length
 436                 if os.path.isfile(filename):
 437                         resume_len = os.path.getsize(filename)
 438                 else:
 439                         resume_len = 0
 440
 441                 # Request parameters in case of being able to resume
 442                 if self.params.get('continuedl', False) and resume_len != 0:
 443                         self.report_resuming_byte(resume_len)
 444                         request.add_header('Range','bytes=%d-' % resume_len)
 445                         open_mode = 'ab'
 446
 447                 # Establish connection
 448                 try:
 449                         data = urllib2.urlopen(request)
 450                 except (urllib2.HTTPError, ), err:
 451                         if err.code != 416: #  416 is 'Requested range not satisfiable'
 452                                 raise
 453                         # Unable to resume
 454                         data = urllib2.urlopen(basic_request)
 455                         content_length = data.info()['Content-Length']
 456
 457                         if content_length is not None and long(content_length) == resume_len:
 458                                 # Because the file had already been fully downloaded
 459                                 self.report_file_already_downloaded(filename)
 460                                 return True
 461                         else:
 462                                 # Because the server didn't let us
 463                                 self.report_unable_to_resume()
 464                                 open_mode = 'wb'
 465
 466                 data_len = data.info().get('Content-length', None)
 467                 data_len_str = self.format_bytes(data_len)
 468                 byte_counter = 0
 469                 block_size = 1024
 470                 start = time.time()
 471                 while True:
 472                         # Download and write
 473                         before = time.time()
 474                         data_block = data.read(block_size)
 475                         after = time.time()
 476                         data_block_len = len(data_block)
 477                         if data_block_len == 0:
 478                                 break
 479                         byte_counter += data_block_len
 480
 481                         # Open file just in time
 482                         if stream is None:
 483                                 try:
 484                                         stream = open(filename, open_mode)
 485                                         self.report_destination(filename)
 486                                 except (OSError, IOError), err:
 487                                         self.trouble('ERROR: unable to open for writing: %s' % str(err))
 488                                         return False
 489                         stream.write(data_block)
 490                         block_size = self.best_block_size(after - before, data_block_len)
 491
 492                         # Progress message
 493                         percent_str = self.calc_percent(byte_counter, data_len)
 494                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
 495                         speed_str = self.calc_speed(start, time.time(), byte_counter)
 496                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 497
 498                         # Apply rate limit
 499                         self.slow_down(start, byte_counter)
 500
 501                 self.report_finish()
 502                 if data_len is not None and str(byte_counter) != data_len:
 503                         raise ContentTooShortError(byte_counter, long(data_len))
 504                 return True
 505
 506 class InfoExtractor(object):
 507         """Information Extractor class.
 508
 509         Information extractors are the classes that, given a URL, extract
 510         information from the video (or videos) the URL refers to. This
 511         information includes the real video URL, the video title and simplified
 512         title, author and others. The information is stored in a dictionary
 513         which is then passed to the FileDownloader. The FileDownloader
 514         processes this information possibly downloading the video to the file
 515         system, among other possible outcomes. The dictionaries must include
 516         the following fields:
 517
 518         id:             Video identifier.
 519         url:            Final video URL.
 520         uploader:       Nickname of the video uploader.
 521         title:          Literal title.
 522         stitle:         Simplified title.
 523         ext:            Video filename extension.
 524
 525         Subclasses of this one should re-define the _real_initialize() and
 526         _real_extract() methods, as well as the suitable() static method.
 527         Probably, they should also be instantiated and added to the main
 528         downloader.
 529         """
 530
 531         _ready = False
 532         _downloader = None
 533
 534         def __init__(self, downloader=None):
 535                 """Constructor. Receives an optional downloader."""
 536                 self._ready = False
 537                 self.set_downloader(downloader)
 538
 539         @staticmethod
 540         def suitable(url):
 541                 """Receives a URL and returns True if suitable for this IE."""
 542                 return False
 543
 544         def initialize(self):
 545                 """Initializes an instance (authentication, etc)."""
 546                 if not self._ready:
 547                         self._real_initialize()
 548                         self._ready = True
 549
 550         def extract(self, url):
 551                 """Extracts URL information and returns it in list of dicts."""
 552                 self.initialize()
 553                 return self._real_extract(url)
 554
 555         def set_downloader(self, downloader):
 556                 """Sets the downloader for this IE."""
 557                 self._downloader = downloader
 558
 559         def _real_initialize(self):
 560                 """Real initialization process. Redefine in subclasses."""
 561                 pass
 562
 563         def _real_extract(self, url):
 564                 """Real extraction process. Redefine in subclasses."""
 565                 pass
 566
 567 class YoutubeIE(InfoExtractor):
 568         """Information extractor for youtube.com."""
 569
 570         _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
 571         _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 572         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
 573         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 574         _NETRC_MACHINE = 'youtube'
 575         _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
 576         _video_extensions = {
 577                 '13': '3gp',
 578                 '17': 'mp4',
 579                 '18': 'mp4',
 580                 '22': 'mp4',
 581                 '37': 'mp4',
 582         }
 583
 584         @staticmethod
 585         def suitable(url):
 586                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 587
 588         @staticmethod
 589         def htmlentity_transform(matchobj):
 590                 """Transforms an HTML entity to a Unicode character."""
 591                 entity = matchobj.group(1)
 592
 593                 # Known non-numeric HTML entity
 594                 if entity in htmlentitydefs.name2codepoint:
 595                         return unichr(htmlentitydefs.name2codepoint[entity])
 596
 597                 # Unicode character
 598                 mobj = re.match(ur'(?u)#(x?\d+)', entity)
 599                 if mobj is not None:
 600                         numstr = mobj.group(1)
 601                         if numstr.startswith(u'x'):
 602                                 base = 16
 603                                 numstr = u'0%s' % numstr
 604                         else:
 605                                 base = 10
 606                         return unichr(long(numstr, base))
 607
 608                 # Unknown entity in name, return its literal representation
 609                 return (u'&%s;' % entity)
 610
 611         def report_lang(self):
 612                 """Report attempt to set language."""
 613                 self._downloader.to_stdout(u'[youtube] Setting language')
 614
 615         def report_login(self):
 616                 """Report attempt to log in."""
 617                 self._downloader.to_stdout(u'[youtube] Logging in')
 618
 619         def report_age_confirmation(self):
 620                 """Report attempt to confirm age."""
 621                 self._downloader.to_stdout(u'[youtube] Confirming age')
 622
 623         def report_video_info_webpage_download(self, video_id):
 624                 """Report attempt to download video info webpage."""
 625                 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
 626
 627         def report_information_extraction(self, video_id):
 628                 """Report attempt to extract video information."""
 629                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
 630
 631         def report_unavailable_format(self, video_id, format):
 632                 """Report extracted video URL."""
 633                 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
 634
 635         def report_rtmp_download(self):
 636                 """Indicate the download will use the RTMP protocol."""
 637                 self._downloader.to_stdout(u'[youtube] RTMP download detected')
 638
 639         def _real_initialize(self):
 640                 if self._downloader is None:
 641                         return
 642
 643                 username = None
 644                 password = None
 645                 downloader_params = self._downloader.params
 646
 647                 # Attempt to use provided username and password or .netrc data
 648                 if downloader_params.get('username', None) is not None:
 649                         username = downloader_params['username']
 650                         password = downloader_params['password']
 651                 elif downloader_params.get('usenetrc', False):
 652                         try:
 653                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 654                                 if info is not None:
 655                                         username = info[0]
 656                                         password = info[2]
 657                                 else:
 658                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 659                         except (IOError, netrc.NetrcParseError), err:
 660                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 661                                 return
 662
 663                 # Set language
 664                 request = urllib2.Request(self._LANG_URL, None, std_headers)
 665                 try:
 666                         self.report_lang()
 667                         urllib2.urlopen(request).read()
 668                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 669                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 670                         return
 671
 672                 # No authentication to be performed
 673                 if username is None:
 674                         return
 675
 676                 # Log in
 677                 login_form = {
 678                                 'current_form': 'loginForm',
 679                                 'next':         '/',
 680                                 'action_login': 'Log In',
 681                                 'username':     username,
 682                                 'password':     password,
 683                                 }
 684                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
 685                 try:
 686                         self.report_login()
 687                         login_results = urllib2.urlopen(request).read()
 688                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 689                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 690                                 return
 691                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 692                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 693                         return
 694
 695                 # Confirm age
 696                 age_form = {
 697                                 'next_url':             '/',
 698                                 'action_confirm':       'Confirm',
 699                                 }
 700                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
 701                 try:
 702                         self.report_age_confirmation()
 703                         age_results = urllib2.urlopen(request).read()
 704                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 705                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 706                         return
 707
 708         def _real_extract(self, url):
 709                 # Extract video id from URL
 710                 mobj = re.match(self._VALID_URL, url)
 711                 if mobj is None:
 712                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 713                         return
 714                 video_id = mobj.group(2)
 715
 716                 # Downloader parameters
 717                 best_quality = False
 718                 format_param = None
 719                 quality_index = 0
 720                 if self._downloader is not None:
 721                         params = self._downloader.params
 722                         format_param = params.get('format', None)
 723                         if format_param == '0':
 724                                 format_param = self._available_formats[quality_index]
 725                                 best_quality = True
 726
 727                 while True:
 728                         # Extension
 729                         video_extension = self._video_extensions.get(format_param, 'flv')
 730
 731                         # Get video info
 732                         video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id
 733                         request = urllib2.Request(video_info_url, None, std_headers)
 734                         try:
 735                                 self.report_video_info_webpage_download(video_id)
 736                                 video_info_webpage = urllib2.urlopen(request).read()
 737                                 video_info = parse_qs(video_info_webpage)
 738                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 739                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
 740                                 return
 741                         self.report_information_extraction(video_id)
 742
 743                         # "t" param
 744                         if 'token' not in video_info:
 745                                 # Attempt to see if YouTube has issued an error message
 746                                 if 'reason' not in video_info:
 747                                         self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
 748                                         stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
 749                                         stream.write(video_info_webpage)
 750                                         stream.close()
 751                                 else:
 752                                         reason = urllib.unquote_plus(video_info['reason'][0])
 753                                         self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
 754                                 return
 755                         token = urllib.unquote_plus(video_info['token'][0])
 756                         video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
 757                         if format_param is not None:
 758                                 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
 759
 760                         # Check possible RTMP download
 761                         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
 762                                 self.report_rtmp_download()
 763                                 video_real_url = video_info['conn'][0]
 764
 765                         # uploader
 766                         if 'author' not in video_info:
 767                                 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 768                                 return
 769                         video_uploader = urllib.unquote_plus(video_info['author'][0])
 770
 771                         # title
 772                         if 'title' not in video_info:
 773                                 self._downloader.trouble(u'ERROR: unable to extract video title')
 774                                 return
 775                         video_title = urllib.unquote_plus(video_info['title'][0])
 776                         video_title = video_title.decode('utf-8')
 777                         video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
 778                         video_title = video_title.replace(os.sep, u'%')
 779
 780                         # simplified title
 781                         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 782                         simple_title = simple_title.strip(ur'_')
 783
 784                         try:
 785                                 # Process video information
 786                                 self._downloader.process_info({
 787                                         'id':           video_id.decode('utf-8'),
 788                                         'url':          video_real_url.decode('utf-8'),
 789                                         'uploader':     video_uploader.decode('utf-8'),
 790                                         'title':        video_title,
 791                                         'stitle':       simple_title,
 792                                         'ext':          video_extension.decode('utf-8'),
 793                                 })
 794
 795                                 return
 796
 797                         except UnavailableFormatError, err:
 798                                 if best_quality:
 799                                         if quality_index == len(self._available_formats) - 1:
 800                                                 # I don't ever expect this to happen
 801                                                 self._downloader.trouble(u'ERROR: no known formats available for video')
 802                                                 return
 803                                         else:
 804                                                 self.report_unavailable_format(video_id, format_param)
 805                                                 quality_index += 1
 806                                                 format_param = self._available_formats[quality_index]
 807                                                 continue
 808                                 else:
 809                                         self._downloader.trouble('ERROR: format not available for video')
 810                                         return
 811
 812
 813 class MetacafeIE(InfoExtractor):
 814         """Information Extractor for metacafe.com."""
 815
 816         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
 817         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
 818         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
 819         _youtube_ie = None
 820
 821         def __init__(self, youtube_ie, downloader=None):
 822                 InfoExtractor.__init__(self, downloader)
 823                 self._youtube_ie = youtube_ie
 824
 825         @staticmethod
 826         def suitable(url):
 827                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
 828
 829         def report_disclaimer(self):
 830                 """Report disclaimer retrieval."""
 831                 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
 832
 833         def report_age_confirmation(self):
 834                 """Report attempt to confirm age."""
 835                 self._downloader.to_stdout(u'[metacafe] Confirming age')
 836
 837         def report_download_webpage(self, video_id):
 838                 """Report webpage download."""
 839                 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
 840
 841         def report_extraction(self, video_id):
 842                 """Report information extraction."""
 843                 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
 844
 845         def _real_initialize(self):
 846                 # Retrieve disclaimer
 847                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
 848                 try:
 849                         self.report_disclaimer()
 850                         disclaimer = urllib2.urlopen(request).read()
 851                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 852                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
 853                         return
 854
 855                 # Confirm age
 856                 disclaimer_form = {
 857                         'filters': '0',
 858                         'submit': "Continue - I'm over 18",
 859                         }
 860                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
 861                 try:
 862                         self.report_age_confirmation()
 863                         disclaimer = urllib2.urlopen(request).read()
 864                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 865                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 866                         return
 867
 868         def _real_extract(self, url):
 869                 # Extract id and simplified title from URL
 870                 mobj = re.match(self._VALID_URL, url)
 871                 if mobj is None:
 872                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 873                         return
 874
 875                 video_id = mobj.group(1)
 876
 877                 # Check if video comes from YouTube
 878                 mobj2 = re.match(r'^yt-(.*)$', video_id)
 879                 if mobj2 is not None:
 880                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
 881                         return
 882
 883                 simple_title = mobj.group(2).decode('utf-8')
 884                 video_extension = 'flv'
 885
 886                 # Retrieve video webpage to extract further information
 887                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
 888                 try:
 889                         self.report_download_webpage(video_id)
 890                         webpage = urllib2.urlopen(request).read()
 891                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 892                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
 893                         return
 894
 895                 # Extract URL, uploader and title from webpage
 896                 self.report_extraction(video_id)
 897                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
 898                 if mobj is None:
 899                         self._downloader.trouble(u'ERROR: unable to extract media URL')
 900                         return
 901                 mediaURL = urllib.unquote(mobj.group(1))
 902
 903                 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
 904                 #if mobj is None:
 905                 #       self._downloader.trouble(u'ERROR: unable to extract gdaKey')
 906                 #       return
 907                 #gdaKey = mobj.group(1)
 908                 #
 909                 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
 910
 911                 video_url = mediaURL
 912
 913                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
 914                 if mobj is None:
 915                         self._downloader.trouble(u'ERROR: unable to extract title')
 916                         return
 917                 video_title = mobj.group(1).decode('utf-8')
 918
 919                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
 920                 if mobj is None:
 921                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 922                         return
 923                 video_uploader = mobj.group(1)
 924
 925                 try:
 926                         # Process video information
 927                         self._downloader.process_info({
 928                                 'id':           video_id.decode('utf-8'),
 929                                 'url':          video_url.decode('utf-8'),
 930                                 'uploader':     video_uploader.decode('utf-8'),
 931                                 'title':        video_title,
 932                                 'stitle':       simple_title,
 933                                 'ext':          video_extension.decode('utf-8'),
 934                         })
 935                 except UnavailableFormatError:
 936                         self._downloader.trouble(u'ERROR: format not available for video')
 937
 938
 939 class YoutubeSearchIE(InfoExtractor):
 940         """Information Extractor for YouTube search queries."""
 941         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
 942         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
 943         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
 944         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
 945         _youtube_ie = None
 946         _max_youtube_results = 1000
 947
 948         def __init__(self, youtube_ie, downloader=None):
 949                 InfoExtractor.__init__(self, downloader)
 950                 self._youtube_ie = youtube_ie
 951
 952         @staticmethod
 953         def suitable(url):
 954                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
 955
 956         def report_download_page(self, query, pagenum):
 957                 """Report attempt to download playlist page with given number."""
 958                 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
 959
 960         def _real_initialize(self):
 961                 self._youtube_ie.initialize()
 962
 963         def _real_extract(self, query):
 964                 mobj = re.match(self._VALID_QUERY, query)
 965                 if mobj is None:
 966                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
 967                         return
 968
 969                 prefix, query = query.split(':')
 970                 prefix = prefix[8:]
 971                 if prefix == '':
 972                         self._download_n_results(query, 1)
 973                         return
 974                 elif prefix == 'all':
 975                         self._download_n_results(query, self._max_youtube_results)
 976                         return
 977                 else:
 978                         try:
 979                                 n = long(prefix)
 980                                 if n <= 0:
 981                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 982                                         return
 983                                 elif n > self._max_youtube_results:
 984                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
 985                                         n = self._max_youtube_results
 986                                 self._download_n_results(query, n)
 987                                 return
 988                         except ValueError: # parsing prefix as integer fails
 989                                 self._download_n_results(query, 1)
 990                                 return
 991
 992         def _download_n_results(self, query, n):
 993                 """Downloads a specified number of results for a query"""
 994
 995                 video_ids = []
 996                 already_seen = set()
 997                 pagenum = 1
 998
 999                 while True:
1000                         self.report_download_page(query, pagenum)
1001                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1002                         request = urllib2.Request(result_url, None, std_headers)
1003                         try:
1004                                 page = urllib2.urlopen(request).read()
1005                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1006                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1007                                 return
1008
1009                         # Extract video identifiers
1010                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1011                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1012                                 if video_id not in already_seen:
1013                                         video_ids.append(video_id)
1014                                         already_seen.add(video_id)
1015                                         if len(video_ids) == n:
1016                                                 # Specified n videos reached
1017                                                 for id in video_ids:
1018                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1019                                                 return
1020
1021                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1022                                 for id in video_ids:
1023                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1024                                 return
1025
1026                         pagenum = pagenum + 1
1027
1028 class YoutubePlaylistIE(InfoExtractor):
1029         """Information Extractor for YouTube playlists."""
1030
1031         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
1032         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1033         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1034         _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
1035         _youtube_ie = None
1036
1037         def __init__(self, youtube_ie, downloader=None):
1038                 InfoExtractor.__init__(self, downloader)
1039                 self._youtube_ie = youtube_ie
1040
1041         @staticmethod
1042         def suitable(url):
1043                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1044
1045         def report_download_page(self, playlist_id, pagenum):
1046                 """Report attempt to download playlist page with given number."""
1047                 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1048
1049         def _real_initialize(self):
1050                 self._youtube_ie.initialize()
1051
1052         def _real_extract(self, url):
1053                 # Extract playlist id
1054                 mobj = re.match(self._VALID_URL, url)
1055                 if mobj is None:
1056                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1057                         return
1058
1059                 # Download playlist pages
1060                 playlist_id = mobj.group(1)
1061                 video_ids = []
1062                 pagenum = 1
1063
1064                 while True:
1065                         self.report_download_page(playlist_id, pagenum)
1066                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1067                         try:
1068                                 page = urllib2.urlopen(request).read()
1069                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1070                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1071                                 return
1072
1073                         # Extract video identifiers
1074                         ids_in_page = []
1075                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1076                                 if mobj.group(1) not in ids_in_page:
1077                                         ids_in_page.append(mobj.group(1))
1078                         video_ids.extend(ids_in_page)
1079
1080                         if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
1081                                 break
1082                         pagenum = pagenum + 1
1083
1084                 for id in video_ids:
1085                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1086                 return
1087
1088 class YoutubeUserIE(InfoExtractor):
1089         """Information Extractor for YouTube users."""
1090
1091         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1092         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1093         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1094         _youtube_ie = None
1095
1096         def __init__(self, youtube_ie, downloader=None):
1097                 InfoExtractor.__init__(self, downloader)
1098                 self._youtube_ie = youtube_ie
1099
1100         @staticmethod
1101         def suitable(url):
1102                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1103
1104         def report_download_page(self, username):
1105                 """Report attempt to download user page."""
1106                 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1107
1108         def _real_initialize(self):
1109                 self._youtube_ie.initialize()
1110
1111         def _real_extract(self, url):
1112                 # Extract username
1113                 mobj = re.match(self._VALID_URL, url)
1114                 if mobj is None:
1115                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1116                         return
1117
1118                 # Download user page
1119                 username = mobj.group(1)
1120                 video_ids = []
1121                 pagenum = 1
1122
1123                 self.report_download_page(username)
1124                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1125                 try:
1126                         page = urllib2.urlopen(request).read()
1127                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1128                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1129                         return
1130
1131                 # Extract video identifiers
1132                 ids_in_page = []
1133
1134                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1135                         if mobj.group(1) not in ids_in_page:
1136                                 ids_in_page.append(mobj.group(1))
1137                 video_ids.extend(ids_in_page)
1138
1139                 for id in video_ids:
1140                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1141                 return
1142
1143 class PostProcessor(object):
1144         """Post Processor class.
1145
1146         PostProcessor objects can be added to downloaders with their
1147         add_post_processor() method. When the downloader has finished a
1148         successful download, it will take its internal chain of PostProcessors
1149         and start calling the run() method on each one of them, first with
1150         an initial argument and then with the returned value of the previous
1151         PostProcessor.
1152
1153         The chain will be stopped if one of them ever returns None or the end
1154         of the chain is reached.
1155
1156         PostProcessor objects follow a "mutual registration" process similar
1157         to InfoExtractor objects.
1158         """
1159
1160         _downloader = None
1161
1162         def __init__(self, downloader=None):
1163                 self._downloader = downloader
1164
1165         def set_downloader(self, downloader):
1166                 """Sets the downloader for this PP."""
1167                 self._downloader = downloader
1168
1169         def run(self, information):
1170                 """Run the PostProcessor.
1171
1172                 The "information" argument is a dictionary like the ones
1173                 composed by InfoExtractors. The only difference is that this
1174                 one has an extra field called "filepath" that points to the
1175                 downloaded file.
1176
1177                 When this method returns None, the postprocessing chain is
1178                 stopped. However, this method may return an information
1179                 dictionary that will be passed to the next postprocessing
1180                 object in the chain. It can be the one it received after
1181                 changing some fields.
1182
1183                 In addition, this method may raise a PostProcessingError
1184                 exception that will be taken into account by the downloader
1185                 it was called from.
1186                 """
1187                 return information # by default, do nothing
1188
1189 ### MAIN PROGRAM ###
1190 if __name__ == '__main__':
1191         try:
1192                 # Modules needed only when running the main program
1193                 import getpass
1194                 import optparse
1195
1196                 # Function to update the program file with the latest version from bitbucket.org
1197                 def update_self(downloader, filename):
1198                         # Note: downloader only used for options
1199                         if not os.access (filename, os.W_OK):
1200                                 sys.exit('ERROR: no write permissions on %s' % filename)
1201
1202                         downloader.to_stdout('Updating to latest stable version...')
1203                         latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1204                         latest_version = urllib.urlopen(latest_url).read().strip()
1205                         prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1206                         newcontent = urllib.urlopen(prog_url).read()
1207                         stream = open(filename, 'w')
1208                         stream.write(newcontent)
1209                         stream.close()
1210                         downloader.to_stdout('Updated to version %s' % latest_version)
1211
1212                 # General configuration
1213                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
1214                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
1215                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1216
1217                 # Parse command line
1218                 parser = optparse.OptionParser(
1219                         usage='Usage: %prog [options] url...',
1220                         version='2010.01.05',
1221                         conflict_handler='resolve',
1222                 )
1223
1224                 parser.add_option('-h', '--help',
1225                                 action='help', help='print this help text and exit')
1226                 parser.add_option('-v', '--version',
1227                                 action='version', help='print program version and exit')
1228                 parser.add_option('-U', '--update',
1229                                 action='store_true', dest='update_self', help='update this program to latest stable version')
1230                 parser.add_option('-i', '--ignore-errors',
1231                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
1232                 parser.add_option('-r', '--rate-limit',
1233                                 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
1234
1235                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
1236                 authentication.add_option('-u', '--username',
1237                                 dest='username', metavar='UN', help='account username')
1238                 authentication.add_option('-p', '--password',
1239                                 dest='password', metavar='PW', help='account password')
1240                 authentication.add_option('-n', '--netrc',
1241                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
1242                 parser.add_option_group(authentication)
1243
1244                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
1245                 video_format.add_option('-f', '--format',
1246                                 action='store', dest='format', metavar='FMT', help='video format code')
1247                 video_format.add_option('-b', '--best-quality',
1248                                 action='store_const', dest='format', help='download the best quality video possible', const='0')
1249                 video_format.add_option('-m', '--mobile-version',
1250                                 action='store_const', dest='format', help='alias for -f 17', const='17')
1251                 video_format.add_option('-d', '--high-def',
1252                                 action='store_const', dest='format', help='alias for -f 22', const='22')
1253                 parser.add_option_group(video_format)
1254
1255                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
1256                 verbosity.add_option('-q', '--quiet',
1257                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
1258                 verbosity.add_option('-s', '--simulate',
1259                                 action='store_true', dest='simulate', help='do not download video', default=False)
1260                 verbosity.add_option('-g', '--get-url',
1261                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
1262                 verbosity.add_option('-e', '--get-title',
1263                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
1264                 parser.add_option_group(verbosity)
1265
1266                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
1267                 filesystem.add_option('-t', '--title',
1268                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
1269                 filesystem.add_option('-l', '--literal',
1270                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
1271                 filesystem.add_option('-o', '--output',
1272                                 dest='outtmpl', metavar='TPL', help='output filename template')
1273                 filesystem.add_option('-a', '--batch-file',
1274                                 dest='batchfile', metavar='F', help='file containing URLs to download')
1275                 filesystem.add_option('-w', '--no-overwrites',
1276                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
1277                 filesystem.add_option('-c', '--continue',
1278                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
1279                 parser.add_option_group(filesystem)
1280
1281                 (opts, args) = parser.parse_args()
1282
1283                 # Batch file verification
1284                 batchurls = []
1285                 if opts.batchfile is not None:
1286                         try:
1287                                 batchurls = open(opts.batchfile, 'r').readlines()
1288                                 batchurls = [x.strip() for x in batchurls]
1289                                 batchurls = [x for x in batchurls if len(x) > 0]
1290                         except IOError:
1291                                 sys.exit(u'ERROR: batch file could not be read')
1292                 all_urls = batchurls + args
1293
1294                 # Conflicting, missing and erroneous options
1295                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
1296                         parser.error(u'using .netrc conflicts with giving username/password')
1297                 if opts.password is not None and opts.username is None:
1298                         parser.error(u'account username missing')
1299                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
1300                         parser.error(u'using output template conflicts with using title or literal title')
1301                 if opts.usetitle and opts.useliteral:
1302                         parser.error(u'using title conflicts with using literal title')
1303                 if opts.username is not None and opts.password is None:
1304                         opts.password = getpass.getpass(u'Type account password and press return:')
1305                 if opts.ratelimit is not None:
1306                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
1307                         if numeric_limit is None:
1308                                 parser.error(u'invalid rate limit specified')
1309                         opts.ratelimit = numeric_limit
1310
1311                 # Information extractors
1312                 youtube_ie = YoutubeIE()
1313                 metacafe_ie = MetacafeIE(youtube_ie)
1314                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
1315                 youtube_user_ie = YoutubeUserIE(youtube_ie)
1316                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
1317
1318                 # File downloader
1319                 fd = FileDownloader({
1320                         'usenetrc': opts.usenetrc,
1321                         'username': opts.username,
1322                         'password': opts.password,
1323                         'quiet': (opts.quiet or opts.geturl or opts.gettitle),
1324                         'forceurl': opts.geturl,
1325                         'forcetitle': opts.gettitle,
1326                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
1327                         'format': opts.format,
1328                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
1329                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
1330                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
1331                                 or u'%(id)s.%(ext)s'),
1332                         'ignoreerrors': opts.ignoreerrors,
1333                         'ratelimit': opts.ratelimit,
1334                         'nooverwrites': opts.nooverwrites,
1335                         'continuedl': opts.continue_dl,
1336                         })
1337                 fd.add_info_extractor(youtube_search_ie)
1338                 fd.add_info_extractor(youtube_pl_ie)
1339                 fd.add_info_extractor(youtube_user_ie)
1340                 fd.add_info_extractor(metacafe_ie)
1341                 fd.add_info_extractor(youtube_ie)
1342
1343                 # Update version
1344                 if opts.update_self:
1345                         update_self(fd, sys.argv[0])
1346
1347                 # Maybe do nothing
1348                 if len(all_urls) < 1:
1349                         if not opts.update_self:
1350                                 parser.error(u'you must provide at least one URL')
1351                         else:
1352                                 sys.exit()
1353                 retcode = fd.download(all_urls)
1354                 sys.exit(retcode)
1355
1356         except DownloadError:
1357                 sys.exit(1)
1358         except SameFileError:
1359                 sys.exit(u'ERROR: fixed output name but more than one file to download')
1360         except KeyboardInterrupt:
1361                 sys.exit(u'\nERROR: Interrupted by user')