youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # Author: Ricardo Garcia Gonzalez
   4 # Author: Danny Colligan
   5 # License: Public domain code
   6 import htmlentitydefs
   7 import httplib
   8 import locale
   9 import math
  10 import netrc
  11 import os
  12 import os.path
  13 import re
  14 import socket
  15 import string
  16 import sys
  17 import time
  18 import urllib
  19 import urllib2
  20
  21 std_headers = {
  22         'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
  23         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  24         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
  25         'Accept-Language': 'en-us,en;q=0.5',
  26 }
  27
  28 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  29
  30 def preferredencoding():
  31         """Get preferred encoding.
  32
  33         Returns the best encoding scheme for the system, based on
  34         locale.getpreferredencoding() and some further tweaks.
  35         """
  36         def yield_preferredencoding():
  37                 try:
  38                         pref = locale.getpreferredencoding()
  39                         u'TEST'.encode(pref)
  40                 except:
  41                         pref = 'UTF-8'
  42                 while True:
  43                         yield pref
  44         return yield_preferredencoding().next()
  45
  46 class DownloadError(Exception):
  47         """Download Error exception.
  48
  49         This exception may be thrown by FileDownloader objects if they are not
  50         configured to continue on errors. They will contain the appropriate
  51         error message.
  52         """
  53         pass
  54
  55 class SameFileError(Exception):
  56         """Same File exception.
  57
  58         This exception will be thrown by FileDownloader objects if they detect
  59         multiple files would have to be downloaded to the same file on disk.
  60         """
  61         pass
  62
  63 class PostProcessingError(Exception):
  64         """Post Processing exception.
  65
  66         This exception may be raised by PostProcessor's .run() method to
  67         indicate an error in the postprocessing task.
  68         """
  69         pass
  70
  71 class UnavailableFormatError(Exception):
  72         """Unavailable Format exception.
  73
  74         This exception will be thrown when a video is requested
  75         in a format that is not available for that video.
  76         """
  77         pass
  78
  79 class ContentTooShortError(Exception):
  80         """Content Too Short exception.
  81
  82         This exception may be raised by FileDownloader objects when a file they
  83         download is too small for what the server announced first, indicating
  84         the connection was probably interrupted.
  85         """
  86         # Both in bytes
  87         downloaded = None
  88         expected = None
  89
  90         def __init__(self, downloaded, expected):
  91                 self.downloaded = downloaded
  92                 self.expected = expected
  93
  94 class FileDownloader(object):
  95         """File Downloader class.
  96
  97         File downloader objects are the ones responsible of downloading the
  98         actual video file and writing it to disk if the user has requested
  99         it, among some other tasks. In most cases there should be one per
 100         program. As, given a video URL, the downloader doesn't know how to
 101         extract all the needed information, task that InfoExtractors do, it
 102         has to pass the URL to one of them.
 103
 104         For this, file downloader objects have a method that allows
 105         InfoExtractors to be registered in a given order. When it is passed
 106         a URL, the file downloader handles it to the first InfoExtractor it
 107         finds that reports being able to handle it. The InfoExtractor extracts
 108         all the information about the video or videos the URL refers to, and
 109         asks the FileDownloader to process the video information, possibly
 110         downloading the video.
 111
 112         File downloaders accept a lot of parameters. In order not to saturate
 113         the object constructor with arguments, it receives a dictionary of
 114         options instead. These options are available through the params
 115         attribute for the InfoExtractors to use. The FileDownloader also
 116         registers itself as the downloader in charge for the InfoExtractors
 117         that are added to it, so this is a "mutual registration".
 118
 119         Available options:
 120
 121         username:       Username for authentication purposes.
 122         password:       Password for authentication purposes.
 123         usenetrc:       Use netrc for authentication instead.
 124         quiet:          Do not print messages to stdout.
 125         forceurl:       Force printing final URL.
 126         forcetitle:     Force printing title.
 127         simulate:       Do not download the video files.
 128         format:         Video format code.
 129         outtmpl:        Template for output names.
 130         ignoreerrors:   Do not stop on download errors.
 131         ratelimit:      Download speed limit, in bytes/sec.
 132         nooverwrites:   Prevent overwriting files.
 133         continuedl:     Try to continue downloads if possible.
 134         """
 135
 136         params = None
 137         _ies = []
 138         _pps = []
 139         _download_retcode = None
 140
 141         def __init__(self, params):
 142                 """Create a FileDownloader object with the given options."""
 143                 self._ies = []
 144                 self._pps = []
 145                 self._download_retcode = 0
 146                 self.params = params
 147
 148         @staticmethod
 149         def pmkdir(filename):
 150                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 151                 components = filename.split(os.sep)
 152                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 153                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 154                 for dir in aggregate:
 155                         if not os.path.exists(dir):
 156                                 os.mkdir(dir)
 157
 158         @staticmethod
 159         def format_bytes(bytes):
 160                 if bytes is None:
 161                         return 'N/A'
 162                 if type(bytes) is str:
 163                         bytes = float(bytes)
 164                 if bytes == 0.0:
 165                         exponent = 0
 166                 else:
 167                         exponent = long(math.log(bytes, 1024.0))
 168                 suffix = 'bkMGTPEZY'[exponent]
 169                 converted = float(bytes) / float(1024**exponent)
 170                 return '%.2f%s' % (converted, suffix)
 171
 172         @staticmethod
 173         def calc_percent(byte_counter, data_len):
 174                 if data_len is None:
 175                         return '---.-%'
 176                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 177
 178         @staticmethod
 179         def calc_eta(start, now, total, current):
 180                 if total is None:
 181                         return '--:--'
 182                 dif = now - start
 183                 if current == 0 or dif < 0.001: # One millisecond
 184                         return '--:--'
 185                 rate = float(current) / dif
 186                 eta = long((float(total) - float(current)) / rate)
 187                 (eta_mins, eta_secs) = divmod(eta, 60)
 188                 if eta_mins > 99:
 189                         return '--:--'
 190                 return '%02d:%02d' % (eta_mins, eta_secs)
 191
 192         @staticmethod
 193         def calc_speed(start, now, bytes):
 194                 dif = now - start
 195                 if bytes == 0 or dif < 0.001: # One millisecond
 196                         return '%10s' % '---b/s'
 197                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 198
 199         @staticmethod
 200         def best_block_size(elapsed_time, bytes):
 201                 new_min = max(bytes / 2.0, 1.0)
 202                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 203                 if elapsed_time < 0.001:
 204                         return long(new_max)
 205                 rate = bytes / elapsed_time
 206                 if rate > new_max:
 207                         return long(new_max)
 208                 if rate < new_min:
 209                         return long(new_min)
 210                 return long(rate)
 211
 212         @staticmethod
 213         def parse_bytes(bytestr):
 214                 """Parse a string indicating a byte quantity into a long integer."""
 215                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 216                 if matchobj is None:
 217                         return None
 218                 number = float(matchobj.group(1))
 219                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 220                 return long(round(number * multiplier))
 221
 222         @staticmethod
 223         def verify_url(url):
 224                 """Verify a URL is valid and data could be downloaded. Return real data URL."""
 225                 request = urllib2.Request(url, None, std_headers)
 226                 data = urllib2.urlopen(request)
 227                 data.read(1)
 228                 url = data.geturl()
 229                 data.close()
 230                 return url
 231
 232         def add_info_extractor(self, ie):
 233                 """Add an InfoExtractor object to the end of the list."""
 234                 self._ies.append(ie)
 235                 ie.set_downloader(self)
 236
 237         def add_post_processor(self, pp):
 238                 """Add a PostProcessor object to the end of the chain."""
 239                 self._pps.append(pp)
 240                 pp.set_downloader(self)
 241
 242         def to_stdout(self, message, skip_eol=False):
 243                 """Print message to stdout if not in quiet mode."""
 244                 if not self.params.get('quiet', False):
 245                         print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
 246                         sys.stdout.flush()
 247
 248         def to_stderr(self, message):
 249                 """Print message to stderr."""
 250                 print >>sys.stderr, message.encode(preferredencoding())
 251
 252         def fixed_template(self):
 253                 """Checks if the output template is fixed."""
 254                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 255
 256         def trouble(self, message=None):
 257                 """Determine action to take when a download problem appears.
 258
 259                 Depending on if the downloader has been configured to ignore
 260                 download errors or not, this method may throw an exception or
 261                 not when errors are found, after printing the message.
 262                 """
 263                 if message is not None:
 264                         self.to_stderr(message)
 265                 if not self.params.get('ignoreerrors', False):
 266                         raise DownloadError(message)
 267                 self._download_retcode = 1
 268
 269         def slow_down(self, start_time, byte_counter):
 270                 """Sleep if the download speed is over the rate limit."""
 271                 rate_limit = self.params.get('ratelimit', None)
 272                 if rate_limit is None or byte_counter == 0:
 273                         return
 274                 now = time.time()
 275                 elapsed = now - start_time
 276                 if elapsed <= 0.0:
 277                         return
 278                 speed = float(byte_counter) / elapsed
 279                 if speed > rate_limit:
 280                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 281
 282         def report_destination(self, filename):
 283                 """Report destination filename."""
 284                 self.to_stdout(u'[download] Destination: %s' % filename)
 285
 286         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 287                 """Report download progress."""
 288                 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
 289                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 290
 291         def report_resuming_byte(self, resume_len):
 292                 """Report attemtp to resume at given byte."""
 293                 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
 294
 295         def report_file_already_downloaded(self, file_name):
 296                 """Report file has already been fully downloaded."""
 297                 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
 298
 299         def report_unable_to_resume(self):
 300                 """Report it was impossible to resume download."""
 301                 self.to_stdout(u'[download] Unable to resume')
 302
 303         def report_finish(self):
 304                 """Report download finished."""
 305                 self.to_stdout(u'')
 306
 307         def process_info(self, info_dict):
 308                 """Process a single dictionary returned by an InfoExtractor."""
 309                 # Do nothing else if in simulate mode
 310                 if self.params.get('simulate', False):
 311                         try:
 312                                 info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
 313                         except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
 314                                 raise UnavailableFormatError
 315
 316                         # Forced printings
 317                         if self.params.get('forcetitle', False):
 318                                 print info_dict['title'].encode(preferredencoding())
 319                         if self.params.get('forceurl', False):
 320                                 print info_dict['url'].encode(preferredencoding())
 321
 322                         return
 323
 324                 try:
 325                         template_dict = dict(info_dict)
 326                         template_dict['epoch'] = unicode(long(time.time()))
 327                         filename = self.params['outtmpl'] % template_dict
 328                 except (ValueError, KeyError), err:
 329                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
 330                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 331                         self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
 332                         return
 333
 334                 try:
 335                         self.pmkdir(filename)
 336                 except (OSError, IOError), err:
 337                         self.trouble('ERROR: unable to create directories: %s' % str(err))
 338                         return
 339
 340                 try:
 341                         success = self._do_download(filename, info_dict['url'].encode('utf-8'))
 342                 except (OSError, IOError), err:
 343                         raise UnavailableFormatError
 344                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 345                         self.trouble('ERROR: unable to download video data: %s' % str(err))
 346                         return
 347                 except (ContentTooShortError, ), err:
 348                         self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 349                         return
 350
 351                 if success:
 352                         try:
 353                                 self.post_process(filename, info_dict)
 354                         except (PostProcessingError), err:
 355                                 self.trouble('ERROR: postprocessing: %s' % str(err))
 356                                 return
 357
 358         def download(self, url_list):
 359                 """Download a given list of URLs."""
 360                 if len(url_list) > 1 and self.fixed_template():
 361                         raise SameFileError(self.params['outtmpl'])
 362
 363                 for url in url_list:
 364                         suitable_found = False
 365                         for ie in self._ies:
 366                                 # Go to next InfoExtractor if not suitable
 367                                 if not ie.suitable(url):
 368                                         continue
 369
 370                                 # Suitable InfoExtractor found
 371                                 suitable_found = True
 372
 373                                 # Extract information from URL and process it
 374                                 ie.extract(url)
 375
 376                                 # Suitable InfoExtractor had been found; go to next URL
 377                                 break
 378
 379                         if not suitable_found:
 380                                 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
 381
 382                 return self._download_retcode
 383
 384         def post_process(self, filename, ie_info):
 385                 """Run the postprocessing chain on the given file."""
 386                 info = dict(ie_info)
 387                 info['filepath'] = filename
 388                 for pp in self._pps:
 389                         info = pp.run(info)
 390                         if info is None:
 391                                 break
 392
 393         def _do_download(self, filename, url):
 394                 stream = None
 395                 open_mode = 'wb'
 396                 basic_request = urllib2.Request(url, None, std_headers)
 397                 request = urllib2.Request(url, None, std_headers)
 398
 399                 # Establish possible resume length
 400                 if os.path.isfile(filename):
 401                         resume_len = os.path.getsize(filename)
 402                 else:
 403                         resume_len = 0
 404
 405                 # Request parameters in case of being able to resume
 406                 if self.params.get('continuedl', False) and resume_len != 0:
 407                         self.report_resuming_byte(resume_len)
 408                         request.add_header('Range','bytes=%d-' % resume_len)
 409                         open_mode = 'ab'
 410
 411                 # Establish connection
 412                 try:
 413                         data = urllib2.urlopen(request)
 414                 except (urllib2.HTTPError, ), err:
 415                         if err.code != 416: #  416 is 'Requested range not satisfiable'
 416                                 raise
 417                         # Unable to resume
 418                         data = urllib2.urlopen(basic_request)
 419                         content_length = data.info()['Content-Length']
 420
 421                         if content_length is not None and long(content_length) == resume_len:
 422                                 # Because the file had already been fully downloaded
 423                                 self.report_file_already_downloaded(filename)
 424                                 return True
 425                         else:
 426                                 # Because the server didn't let us
 427                                 self.report_unable_to_resume()
 428                                 open_mode = 'wb'
 429
 430                 data_len = data.info().get('Content-length', None)
 431                 data_len_str = self.format_bytes(data_len)
 432                 byte_counter = 0
 433                 block_size = 1024
 434                 start = time.time()
 435                 while True:
 436                         # Download and write
 437                         before = time.time()
 438                         data_block = data.read(block_size)
 439                         after = time.time()
 440                         data_block_len = len(data_block)
 441                         if data_block_len == 0:
 442                                 break
 443                         byte_counter += data_block_len
 444
 445                         # Open file just in time
 446                         if stream is None:
 447                                 try:
 448                                         stream = open(filename, open_mode)
 449                                         self.report_destination(filename)
 450                                 except (OSError, IOError), err:
 451                                         self.trouble('ERROR: unable to open for writing: %s' % str(err))
 452                                         return False
 453                         stream.write(data_block)
 454                         block_size = self.best_block_size(after - before, data_block_len)
 455
 456                         # Progress message
 457                         percent_str = self.calc_percent(byte_counter, data_len)
 458                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
 459                         speed_str = self.calc_speed(start, time.time(), byte_counter)
 460                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 461
 462                         # Apply rate limit
 463                         self.slow_down(start, byte_counter)
 464
 465                 self.report_finish()
 466                 if data_len is not None and str(byte_counter) != data_len:
 467                         raise ContentTooShortError(byte_counter, long(data_len))
 468                 return True
 469
 470 class InfoExtractor(object):
 471         """Information Extractor class.
 472
 473         Information extractors are the classes that, given a URL, extract
 474         information from the video (or videos) the URL refers to. This
 475         information includes the real video URL, the video title and simplified
 476         title, author and others. The information is stored in a dictionary
 477         which is then passed to the FileDownloader. The FileDownloader
 478         processes this information possibly downloading the video to the file
 479         system, among other possible outcomes. The dictionaries must include
 480         the following fields:
 481
 482         id:             Video identifier.
 483         url:            Final video URL.
 484         uploader:       Nickname of the video uploader.
 485         title:          Literal title.
 486         stitle:         Simplified title.
 487         ext:            Video filename extension.
 488
 489         Subclasses of this one should re-define the _real_initialize() and
 490         _real_extract() methods, as well as the suitable() static method.
 491         Probably, they should also be instantiated and added to the main
 492         downloader.
 493         """
 494
 495         _ready = False
 496         _downloader = None
 497
 498         def __init__(self, downloader=None):
 499                 """Constructor. Receives an optional downloader."""
 500                 self._ready = False
 501                 self.set_downloader(downloader)
 502
 503         @staticmethod
 504         def suitable(url):
 505                 """Receives a URL and returns True if suitable for this IE."""
 506                 return False
 507
 508         def initialize(self):
 509                 """Initializes an instance (authentication, etc)."""
 510                 if not self._ready:
 511                         self._real_initialize()
 512                         self._ready = True
 513
 514         def extract(self, url):
 515                 """Extracts URL information and returns it in list of dicts."""
 516                 self.initialize()
 517                 return self._real_extract(url)
 518
 519         def set_downloader(self, downloader):
 520                 """Sets the downloader for this IE."""
 521                 self._downloader = downloader
 522
 523         def _real_initialize(self):
 524                 """Real initialization process. Redefine in subclasses."""
 525                 pass
 526
 527         def _real_extract(self, url):
 528                 """Real extraction process. Redefine in subclasses."""
 529                 pass
 530
 531 class YoutubeIE(InfoExtractor):
 532         """Information extractor for youtube.com."""
 533
 534         _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
 535         _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 536         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
 537         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 538         _NETRC_MACHINE = 'youtube'
 539         _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
 540         _video_extensions = {
 541                 '13': '3gp',
 542                 '17': 'mp4',
 543                 '18': 'mp4',
 544                 '22': 'mp4',
 545                 '37': 'mp4',
 546         }
 547
 548         @staticmethod
 549         def suitable(url):
 550                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 551
 552         @staticmethod
 553         def htmlentity_transform(matchobj):
 554                 """Transforms an HTML entity to a Unicode character."""
 555                 entity = matchobj.group(1)
 556
 557                 # Known non-numeric HTML entity
 558                 if entity in htmlentitydefs.name2codepoint:
 559                         return unichr(htmlentitydefs.name2codepoint[entity])
 560
 561                 # Unicode character
 562                 mobj = re.match(ur'(?u)#(x?\d+)', entity)
 563                 if mobj is not None:
 564                         numstr = mobj.group(1)
 565                         if numstr.startswith(u'x'):
 566                                 base = 16
 567                                 numstr = u'0%s' % numstr
 568                         else:
 569                                 base = 10
 570                         return unichr(long(numstr, base))
 571
 572                 # Unknown entity in name, return its literal representation
 573                 return (u'&%s;' % entity)
 574
 575         def report_lang(self):
 576                 """Report attempt to set language."""
 577                 self._downloader.to_stdout(u'[youtube] Setting language')
 578
 579         def report_login(self):
 580                 """Report attempt to log in."""
 581                 self._downloader.to_stdout(u'[youtube] Logging in')
 582
 583         def report_age_confirmation(self):
 584                 """Report attempt to confirm age."""
 585                 self._downloader.to_stdout(u'[youtube] Confirming age')
 586
 587         def report_video_info_webpage_download(self, video_id):
 588                 """Report attempt to download video info webpage."""
 589                 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
 590
 591         def report_information_extraction(self, video_id):
 592                 """Report attempt to extract video information."""
 593                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
 594
 595         def report_unavailable_format(self, video_id, format):
 596                 """Report extracted video URL."""
 597                 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
 598
 599         def _real_initialize(self):
 600                 if self._downloader is None:
 601                         return
 602
 603                 username = None
 604                 password = None
 605                 downloader_params = self._downloader.params
 606
 607                 # Attempt to use provided username and password or .netrc data
 608                 if downloader_params.get('username', None) is not None:
 609                         username = downloader_params['username']
 610                         password = downloader_params['password']
 611                 elif downloader_params.get('usenetrc', False):
 612                         try:
 613                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 614                                 if info is not None:
 615                                         username = info[0]
 616                                         password = info[2]
 617                                 else:
 618                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 619                         except (IOError, netrc.NetrcParseError), err:
 620                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 621                                 return
 622
 623                 # Set language
 624                 request = urllib2.Request(self._LANG_URL, None, std_headers)
 625                 try:
 626                         self.report_lang()
 627                         urllib2.urlopen(request).read()
 628                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 629                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 630                         return
 631
 632                 # No authentication to be performed
 633                 if username is None:
 634                         return
 635
 636                 # Log in
 637                 login_form = {
 638                                 'current_form': 'loginForm',
 639                                 'next':         '/',
 640                                 'action_login': 'Log In',
 641                                 'username':     username,
 642                                 'password':     password,
 643                                 }
 644                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
 645                 try:
 646                         self.report_login()
 647                         login_results = urllib2.urlopen(request).read()
 648                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 649                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 650                                 return
 651                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 652                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 653                         return
 654
 655                 # Confirm age
 656                 age_form = {
 657                                 'next_url':             '/',
 658                                 'action_confirm':       'Confirm',
 659                                 }
 660                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
 661                 try:
 662                         self.report_age_confirmation()
 663                         age_results = urllib2.urlopen(request).read()
 664                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 665                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 666                         return
 667
 668         def _real_extract(self, url):
 669                 # Extract video id from URL
 670                 mobj = re.match(self._VALID_URL, url)
 671                 if mobj is None:
 672                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 673                         return
 674                 video_id = mobj.group(2)
 675
 676                 # Downloader parameters
 677                 best_quality = False
 678                 format_param = None
 679                 quality_index = 0
 680                 if self._downloader is not None:
 681                         params = self._downloader.params
 682                         format_param = params.get('format', None)
 683                         if format_param == '0':
 684                                 format_param = self._available_formats[quality_index]
 685                                 best_quality = True
 686
 687                 while True:
 688                         # Extension
 689                         video_extension = self._video_extensions.get(format_param, 'flv')
 690
 691                         # Get video info
 692                         video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id
 693                         request = urllib2.Request(video_info_url, None, std_headers)
 694                         try:
 695                                 self.report_video_info_webpage_download(video_id)
 696                                 video_info_webpage = urllib2.urlopen(request).read()
 697                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 698                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
 699                                 return
 700                         self.report_information_extraction(video_id)
 701
 702                         # "t" param
 703                         mobj = re.search(r'(?m)&token=([^&]+)(?:&|$)', video_info_webpage)
 704                         if mobj is None:
 705                                 # Attempt to see if YouTube has issued an error message
 706                                 mobj = re.search(r'(?m)&reason=([^&]+)(?:&|$)', video_info_webpage)
 707                                 if mobj is None:
 708                                         self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
 709                                         stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
 710                                         stream.write(video_info_webpage)
 711                                         stream.close()
 712                                 else:
 713                                         reason = urllib.unquote_plus(mobj.group(1))
 714                                         self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
 715                                 return
 716                         token = urllib.unquote(mobj.group(1))
 717                         video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
 718                         if format_param is not None:
 719                                 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
 720
 721                         # uploader
 722                         mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage)
 723                         if mobj is None:
 724                                 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 725                                 return
 726                         video_uploader = urllib.unquote(mobj.group(1))
 727
 728                         # title
 729                         mobj = re.search(r'(?m)&title=([^&]*)(?:&|$)', video_info_webpage)
 730                         if mobj is None:
 731                                 self._downloader.trouble(u'ERROR: unable to extract video title')
 732                                 return
 733                         video_title = urllib.unquote_plus(mobj.group(1))
 734                         video_title = video_title.decode('utf-8')
 735                         video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
 736                         video_title = video_title.replace(os.sep, u'%')
 737
 738                         # simplified title
 739                         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 740                         simple_title = simple_title.strip(ur'_')
 741
 742                         try:
 743                                 # Process video information
 744                                 self._downloader.process_info({
 745                                         'id':           video_id.decode('utf-8'),
 746                                         'url':          video_real_url.decode('utf-8'),
 747                                         'uploader':     video_uploader.decode('utf-8'),
 748                                         'title':        video_title,
 749                                         'stitle':       simple_title,
 750                                         'ext':          video_extension.decode('utf-8'),
 751                                 })
 752
 753                                 return
 754
 755                         except UnavailableFormatError, err:
 756                                 if best_quality:
 757                                         if quality_index == len(self._available_formats) - 1:
 758                                                 # I don't ever expect this to happen
 759                                                 self._downloader.trouble(u'ERROR: no known formats available for video')
 760                                                 return
 761                                         else:
 762                                                 self.report_unavailable_format(video_id, format_param)
 763                                                 quality_index += 1
 764                                                 format_param = self._available_formats[quality_index]
 765                                                 continue
 766                                 else:
 767                                         self._downloader.trouble('ERROR: format not available for video')
 768                                         return
 769
 770
 771 class MetacafeIE(InfoExtractor):
 772         """Information Extractor for metacafe.com."""
 773
 774         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
 775         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
 776         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
 777         _youtube_ie = None
 778
 779         def __init__(self, youtube_ie, downloader=None):
 780                 InfoExtractor.__init__(self, downloader)
 781                 self._youtube_ie = youtube_ie
 782
 783         @staticmethod
 784         def suitable(url):
 785                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
 786
 787         def report_disclaimer(self):
 788                 """Report disclaimer retrieval."""
 789                 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
 790
 791         def report_age_confirmation(self):
 792                 """Report attempt to confirm age."""
 793                 self._downloader.to_stdout(u'[metacafe] Confirming age')
 794
 795         def report_download_webpage(self, video_id):
 796                 """Report webpage download."""
 797                 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
 798
 799         def report_extraction(self, video_id):
 800                 """Report information extraction."""
 801                 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
 802
 803         def _real_initialize(self):
 804                 # Retrieve disclaimer
 805                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
 806                 try:
 807                         self.report_disclaimer()
 808                         disclaimer = urllib2.urlopen(request).read()
 809                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 810                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
 811                         return
 812
 813                 # Confirm age
 814                 disclaimer_form = {
 815                         'filters': '0',
 816                         'submit': "Continue - I'm over 18",
 817                         }
 818                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
 819                 try:
 820                         self.report_age_confirmation()
 821                         disclaimer = urllib2.urlopen(request).read()
 822                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 823                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 824                         return
 825
 826         def _real_extract(self, url):
 827                 # Extract id and simplified title from URL
 828                 mobj = re.match(self._VALID_URL, url)
 829                 if mobj is None:
 830                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 831                         return
 832
 833                 video_id = mobj.group(1)
 834
 835                 # Check if video comes from YouTube
 836                 mobj2 = re.match(r'^yt-(.*)$', video_id)
 837                 if mobj2 is not None:
 838                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
 839                         return
 840
 841                 simple_title = mobj.group(2).decode('utf-8')
 842                 video_extension = 'flv'
 843
 844                 # Retrieve video webpage to extract further information
 845                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
 846                 try:
 847                         self.report_download_webpage(video_id)
 848                         webpage = urllib2.urlopen(request).read()
 849                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 850                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
 851                         return
 852
 853                 # Extract URL, uploader and title from webpage
 854                 self.report_extraction(video_id)
 855                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
 856                 if mobj is None:
 857                         self._downloader.trouble(u'ERROR: unable to extract media URL')
 858                         return
 859                 mediaURL = urllib.unquote(mobj.group(1))
 860
 861                 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
 862                 #if mobj is None:
 863                 #       self._downloader.trouble(u'ERROR: unable to extract gdaKey')
 864                 #       return
 865                 #gdaKey = mobj.group(1)
 866                 #
 867                 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
 868
 869                 video_url = mediaURL
 870
 871                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
 872                 if mobj is None:
 873                         self._downloader.trouble(u'ERROR: unable to extract title')
 874                         return
 875                 video_title = mobj.group(1).decode('utf-8')
 876
 877                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
 878                 if mobj is None:
 879                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 880                         return
 881                 video_uploader = mobj.group(1)
 882
 883                 try:
 884                         # Process video information
 885                         self._downloader.process_info({
 886                                 'id':           video_id.decode('utf-8'),
 887                                 'url':          video_url.decode('utf-8'),
 888                                 'uploader':     video_uploader.decode('utf-8'),
 889                                 'title':        video_title,
 890                                 'stitle':       simple_title,
 891                                 'ext':          video_extension.decode('utf-8'),
 892                         })
 893                 except UnavailableFormatError:
 894                         self._downloader.trouble(u'ERROR: format not available for video')
 895
 896
 897 class YoutubeSearchIE(InfoExtractor):
 898         """Information Extractor for YouTube search queries."""
 899         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
 900         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
 901         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
 902         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
 903         _youtube_ie = None
 904         _max_youtube_results = 1000
 905
 906         def __init__(self, youtube_ie, downloader=None):
 907                 InfoExtractor.__init__(self, downloader)
 908                 self._youtube_ie = youtube_ie
 909
 910         @staticmethod
 911         def suitable(url):
 912                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
 913
 914         def report_download_page(self, query, pagenum):
 915                 """Report attempt to download playlist page with given number."""
 916                 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
 917
 918         def _real_initialize(self):
 919                 self._youtube_ie.initialize()
 920
 921         def _real_extract(self, query):
 922                 mobj = re.match(self._VALID_QUERY, query)
 923                 if mobj is None:
 924                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
 925                         return
 926
 927                 prefix, query = query.split(':')
 928                 prefix = prefix[8:]
 929                 if prefix == '':
 930                         self._download_n_results(query, 1)
 931                         return
 932                 elif prefix == 'all':
 933                         self._download_n_results(query, self._max_youtube_results)
 934                         return
 935                 else:
 936                         try:
 937                                 n = long(prefix)
 938                                 if n <= 0:
 939                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 940                                         return
 941                                 elif n > self._max_youtube_results:
 942                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
 943                                         n = self._max_youtube_results
 944                                 self._download_n_results(query, n)
 945                                 return
 946                         except ValueError: # parsing prefix as integer fails
 947                                 self._download_n_results(query, 1)
 948                                 return
 949
 950         def _download_n_results(self, query, n):
 951                 """Downloads a specified number of results for a query"""
 952
 953                 video_ids = []
 954                 already_seen = set()
 955                 pagenum = 1
 956
 957                 while True:
 958                         self.report_download_page(query, pagenum)
 959                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
 960                         request = urllib2.Request(result_url, None, std_headers)
 961                         try:
 962                                 page = urllib2.urlopen(request).read()
 963                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 964                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
 965                                 return
 966
 967                         # Extract video identifiers
 968                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
 969                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
 970                                 if video_id not in already_seen:
 971                                         video_ids.append(video_id)
 972                                         already_seen.add(video_id)
 973                                         if len(video_ids) == n:
 974                                                 # Specified n videos reached
 975                                                 for id in video_ids:
 976                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
 977                                                 return
 978
 979                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
 980                                 for id in video_ids:
 981                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
 982                                 return
 983
 984                         pagenum = pagenum + 1
 985
 986 class YoutubePlaylistIE(InfoExtractor):
 987         """Information Extractor for YouTube playlists."""
 988
 989         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
 990         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
 991         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
 992         _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
 993         _youtube_ie = None
 994
 995         def __init__(self, youtube_ie, downloader=None):
 996                 InfoExtractor.__init__(self, downloader)
 997                 self._youtube_ie = youtube_ie
 998
 999         @staticmethod
1000         def suitable(url):
1001                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1002
1003         def report_download_page(self, playlist_id, pagenum):
1004                 """Report attempt to download playlist page with given number."""
1005                 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1006
1007         def _real_initialize(self):
1008                 self._youtube_ie.initialize()
1009
1010         def _real_extract(self, url):
1011                 # Extract playlist id
1012                 mobj = re.match(self._VALID_URL, url)
1013                 if mobj is None:
1014                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1015                         return
1016
1017                 # Download playlist pages
1018                 playlist_id = mobj.group(1)
1019                 video_ids = []
1020                 pagenum = 1
1021
1022                 while True:
1023                         self.report_download_page(playlist_id, pagenum)
1024                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1025                         try:
1026                                 page = urllib2.urlopen(request).read()
1027                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1028                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1029                                 return
1030
1031                         # Extract video identifiers
1032                         ids_in_page = []
1033                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1034                                 if mobj.group(1) not in ids_in_page:
1035                                         ids_in_page.append(mobj.group(1))
1036                         video_ids.extend(ids_in_page)
1037
1038                         if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
1039                                 break
1040                         pagenum = pagenum + 1
1041
1042                 for id in video_ids:
1043                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1044                 return
1045
1046 class YoutubeUserIE(InfoExtractor):
1047         """Information Extractor for YouTube users."""
1048
1049         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1050         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1051         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)'
1052         _youtube_ie = None
1053
1054         def __init__(self, youtube_ie, downloader=None):
1055                 InfoExtractor.__init__(self, downloader)
1056                 self._youtube_ie = youtube_ie
1057
1058         @staticmethod
1059         def suitable(url):
1060                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1061
1062         def report_download_page(self, username):
1063                 """Report attempt to download user page."""
1064                 self._downloader.to_stdout(u'[youtube] USR %s: Downloading page ' % (username))
1065
1066         def _real_initialize(self):
1067                 self._youtube_ie.initialize()
1068
1069         def _real_extract(self, url):
1070                 # Extract username
1071                 mobj = re.match(self._VALID_URL, url)
1072                 if mobj is None:
1073                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1074                         return
1075
1076                 # Download user page
1077                 username = mobj.group(1)
1078                 video_ids = []
1079                 pagenum = 1
1080
1081                 self.report_download_page(username)
1082                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1083                 try:
1084                         page = urllib2.urlopen(request).read()
1085                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1086                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1087                         return
1088
1089                 # Extract video identifiers
1090                 ids_in_page = []
1091
1092                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1093                         print mobj.group(1)
1094                         if mobj.group(1) not in ids_in_page:
1095                                 ids_in_page.append(mobj.group(1))
1096                 video_ids.extend(ids_in_page)
1097
1098                 for id in video_ids:
1099                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1100                 return
1101
1102 class PostProcessor(object):
1103         """Post Processor class.
1104
1105         PostProcessor objects can be added to downloaders with their
1106         add_post_processor() method. When the downloader has finished a
1107         successful download, it will take its internal chain of PostProcessors
1108         and start calling the run() method on each one of them, first with
1109         an initial argument and then with the returned value of the previous
1110         PostProcessor.
1111
1112         The chain will be stopped if one of them ever returns None or the end
1113         of the chain is reached.
1114
1115         PostProcessor objects follow a "mutual registration" process similar
1116         to InfoExtractor objects.
1117         """
1118
1119         _downloader = None
1120
1121         def __init__(self, downloader=None):
1122                 self._downloader = downloader
1123
1124         def set_downloader(self, downloader):
1125                 """Sets the downloader for this PP."""
1126                 self._downloader = downloader
1127
1128         def run(self, information):
1129                 """Run the PostProcessor.
1130
1131                 The "information" argument is a dictionary like the ones
1132                 composed by InfoExtractors. The only difference is that this
1133                 one has an extra field called "filepath" that points to the
1134                 downloaded file.
1135
1136                 When this method returns None, the postprocessing chain is
1137                 stopped. However, this method may return an information
1138                 dictionary that will be passed to the next postprocessing
1139                 object in the chain. It can be the one it received after
1140                 changing some fields.
1141
1142                 In addition, this method may raise a PostProcessingError
1143                 exception that will be taken into account by the downloader
1144                 it was called from.
1145                 """
1146                 return information # by default, do nothing
1147
1148 ### MAIN PROGRAM ###
1149 if __name__ == '__main__':
1150         try:
1151                 # Modules needed only when running the main program
1152                 import getpass
1153                 import optparse
1154
1155                 # Function to update the program file with the latest version from bitbucket.org
1156                 def update_self(downloader, filename):
1157                         # Note: downloader only used for options
1158                         if not os.access (filename, os.W_OK):
1159                                 sys.exit('ERROR: no write permissions on %s' % filename)
1160
1161                         downloader.to_stdout('Updating to latest stable version...')
1162                         latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1163                         latest_version = urllib.urlopen(latest_url).read().strip()
1164                         prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1165                         newcontent = urllib.urlopen(prog_url).read()
1166                         stream = open(filename, 'w')
1167                         stream.write(newcontent)
1168                         stream.close()
1169                         downloader.to_stdout('Updated to version %s' % latest_version)
1170
1171                 # General configuration
1172                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
1173                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
1174                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1175
1176                 # Parse command line
1177                 parser = optparse.OptionParser(
1178                         usage='Usage: %prog [options] url...',
1179                         version='2009.12.26',
1180                         conflict_handler='resolve',
1181                 )
1182
1183                 parser.add_option('-h', '--help',
1184                                 action='help', help='print this help text and exit')
1185                 parser.add_option('-v', '--version',
1186                                 action='version', help='print program version and exit')
1187                 parser.add_option('-U', '--update',
1188                                 action='store_true', dest='update_self', help='update this program to latest stable version')
1189                 parser.add_option('-i', '--ignore-errors',
1190                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
1191                 parser.add_option('-r', '--rate-limit',
1192                                 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
1193
1194                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
1195                 authentication.add_option('-u', '--username',
1196                                 dest='username', metavar='UN', help='account username')
1197                 authentication.add_option('-p', '--password',
1198                                 dest='password', metavar='PW', help='account password')
1199                 authentication.add_option('-n', '--netrc',
1200                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
1201                 parser.add_option_group(authentication)
1202
1203                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
1204                 video_format.add_option('-f', '--format',
1205                                 action='store', dest='format', metavar='FMT', help='video format code')
1206                 video_format.add_option('-b', '--best-quality',
1207                                 action='store_const', dest='format', help='download the best quality video possible', const='0')
1208                 video_format.add_option('-m', '--mobile-version',
1209                                 action='store_const', dest='format', help='alias for -f 17', const='17')
1210                 video_format.add_option('-d', '--high-def',
1211                                 action='store_const', dest='format', help='alias for -f 22', const='22')
1212                 parser.add_option_group(video_format)
1213
1214                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
1215                 verbosity.add_option('-q', '--quiet',
1216                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
1217                 verbosity.add_option('-s', '--simulate',
1218                                 action='store_true', dest='simulate', help='do not download video', default=False)
1219                 verbosity.add_option('-g', '--get-url',
1220                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
1221                 verbosity.add_option('-e', '--get-title',
1222                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
1223                 parser.add_option_group(verbosity)
1224
1225                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
1226                 filesystem.add_option('-t', '--title',
1227                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
1228                 filesystem.add_option('-l', '--literal',
1229                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
1230                 filesystem.add_option('-o', '--output',
1231                                 dest='outtmpl', metavar='TPL', help='output filename template')
1232                 filesystem.add_option('-a', '--batch-file',
1233                                 dest='batchfile', metavar='F', help='file containing URLs to download')
1234                 filesystem.add_option('-w', '--no-overwrites',
1235                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
1236                 filesystem.add_option('-c', '--continue',
1237                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
1238                 parser.add_option_group(filesystem)
1239
1240                 (opts, args) = parser.parse_args()
1241
1242                 # Batch file verification
1243                 batchurls = []
1244                 if opts.batchfile is not None:
1245                         try:
1246                                 batchurls = open(opts.batchfile, 'r').readlines()
1247                                 batchurls = [x.strip() for x in batchurls]
1248                                 batchurls = [x for x in batchurls if len(x) > 0]
1249                         except IOError:
1250                                 sys.exit(u'ERROR: batch file could not be read')
1251                 all_urls = batchurls + args
1252
1253                 # Conflicting, missing and erroneous options
1254                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
1255                         parser.error(u'using .netrc conflicts with giving username/password')
1256                 if opts.password is not None and opts.username is None:
1257                         parser.error(u'account username missing')
1258                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
1259                         parser.error(u'using output template conflicts with using title or literal title')
1260                 if opts.usetitle and opts.useliteral:
1261                         parser.error(u'using title conflicts with using literal title')
1262                 if opts.username is not None and opts.password is None:
1263                         opts.password = getpass.getpass(u'Type account password and press return:')
1264                 if opts.ratelimit is not None:
1265                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
1266                         if numeric_limit is None:
1267                                 parser.error(u'invalid rate limit specified')
1268                         opts.ratelimit = numeric_limit
1269
1270                 # Information extractors
1271                 youtube_ie = YoutubeIE()
1272                 metacafe_ie = MetacafeIE(youtube_ie)
1273                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
1274                 youtube_user_ie = YoutubeUserIE(youtube_ie)
1275                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
1276
1277                 # File downloader
1278                 fd = FileDownloader({
1279                         'usenetrc': opts.usenetrc,
1280                         'username': opts.username,
1281                         'password': opts.password,
1282                         'quiet': (opts.quiet or opts.geturl or opts.gettitle),
1283                         'forceurl': opts.geturl,
1284                         'forcetitle': opts.gettitle,
1285                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
1286                         'format': opts.format,
1287                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
1288                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
1289                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
1290                                 or u'%(id)s.%(ext)s'),
1291                         'ignoreerrors': opts.ignoreerrors,
1292                         'ratelimit': opts.ratelimit,
1293                         'nooverwrites': opts.nooverwrites,
1294                         'continuedl': opts.continue_dl,
1295                         })
1296                 fd.add_info_extractor(youtube_search_ie)
1297                 fd.add_info_extractor(youtube_pl_ie)
1298                 fd.add_info_extractor(youtube_user_ie)
1299                 fd.add_info_extractor(metacafe_ie)
1300                 fd.add_info_extractor(youtube_ie)
1301
1302                 # Update version
1303                 if opts.update_self:
1304                         update_self(fd, sys.argv[0])
1305
1306                 # Maybe do nothing
1307                 if len(all_urls) < 1:
1308                         if not opts.update_self:
1309                                 parser.error(u'you must provide at least one URL')
1310                         else:
1311                                 sys.exit()
1312                 retcode = fd.download(all_urls)
1313                 sys.exit(retcode)
1314
1315         except DownloadError:
1316                 sys.exit(1)
1317         except SameFileError:
1318                 sys.exit(u'ERROR: fixed output name but more than one file to download')
1319         except KeyboardInterrupt:
1320                 sys.exit(u'\nERROR: Interrupted by user')