youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # Author: Ricardo Garcia Gonzalez
   4 # Author: Danny Colligan
   5 # Author: Benjamin Johnson
   6 # Author: Vasyl' Vavrychuk
   7 # License: Public domain code
   8 import cookielib
   9 import ctypes
  10 import datetime
  11 import htmlentitydefs
  12 import httplib
  13 import locale
  14 import math
  15 import netrc
  16 import os
  17 import os.path
  18 import re
  19 import socket
  20 import string
  21 import subprocess
  22 import sys
  23 import time
  24 import urllib
  25 import urllib2
  26
  27 # parse_qs was moved from the cgi module to the urlparse module recently.
  28 try:
  29         from urlparse import parse_qs
  30 except ImportError:
  31         from cgi import parse_qs
  32
  33 std_headers = {
  34         'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12',
  35         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  36         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  37         'Accept-Encoding': 'gzip, deflate',
  38         'Accept-Language': 'en-us,en;q=0.5',
  39 }
  40
  41 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  42
  43 def preferredencoding():
  44         """Get preferred encoding.
  45
  46         Returns the best encoding scheme for the system, based on
  47         locale.getpreferredencoding() and some further tweaks.
  48         """
  49         def yield_preferredencoding():
  50                 try:
  51                         pref = locale.getpreferredencoding()
  52                         u'TEST'.encode(pref)
  53                 except:
  54                         pref = 'UTF-8'
  55                 while True:
  56                         yield pref
  57         return yield_preferredencoding().next()
  58
  59 def htmlentity_transform(matchobj):
  60         """Transforms an HTML entity to a Unicode character.
  61
  62         This function receives a match object and is intended to be used with
  63         the re.sub() function.
  64         """
  65         entity = matchobj.group(1)
  66
  67         # Known non-numeric HTML entity
  68         if entity in htmlentitydefs.name2codepoint:
  69                 return unichr(htmlentitydefs.name2codepoint[entity])
  70
  71         # Unicode character
  72         mobj = re.match(ur'(?u)#(x?\d+)', entity)
  73         if mobj is not None:
  74                 numstr = mobj.group(1)
  75                 if numstr.startswith(u'x'):
  76                         base = 16
  77                         numstr = u'0%s' % numstr
  78                 else:
  79                         base = 10
  80                 return unichr(long(numstr, base))
  81
  82         # Unknown entity in name, return its literal representation
  83         return (u'&%s;' % entity)
  84
  85 def sanitize_title(utitle):
  86         """Sanitizes a video title so it could be used as part of a filename."""
  87         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
  88         return utitle.replace(unicode(os.sep), u'%')
  89
  90 def sanitize_open(filename, open_mode):
  91         """Try to open the given filename, and slightly tweak it if this fails.
  92
  93         Attempts to open the given filename. If this fails, it tries to change
  94         the filename slightly, step by step, until it's either able to open it
  95         or it fails and raises a final exception, like the standard open()
  96         function.
  97
  98         It returns the tuple (stream, definitive_file_name).
  99         """
 100         try:
 101                 if filename == u'-':
 102                         if sys.platform == 'win32':
 103                                 import msvcrt
 104                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 105                         return (sys.stdout, filename)
 106                 stream = open(filename, open_mode)
 107                 return (stream, filename)
 108         except (IOError, OSError), err:
 109                 # In case of error, try to remove win32 forbidden chars
 110                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
 111
 112                 # An exception here should be caught in the caller
 113                 stream = open(filename, open_mode)
 114                 return (stream, filename)
 115
 116 class DownloadError(Exception):
 117         """Download Error exception.
 118
 119         This exception may be thrown by FileDownloader objects if they are not
 120         configured to continue on errors. They will contain the appropriate
 121         error message.
 122         """
 123         pass
 124
 125 class SameFileError(Exception):
 126         """Same File exception.
 127
 128         This exception will be thrown by FileDownloader objects if they detect
 129         multiple files would have to be downloaded to the same file on disk.
 130         """
 131         pass
 132
 133 class PostProcessingError(Exception):
 134         """Post Processing exception.
 135
 136         This exception may be raised by PostProcessor's .run() method to
 137         indicate an error in the postprocessing task.
 138         """
 139         pass
 140
 141 class UnavailableVideoError(Exception):
 142         """Unavailable Format exception.
 143
 144         This exception will be thrown when a video is requested
 145         in a format that is not available for that video.
 146         """
 147         pass
 148
 149 class ContentTooShortError(Exception):
 150         """Content Too Short exception.
 151
 152         This exception may be raised by FileDownloader objects when a file they
 153         download is too small for what the server announced first, indicating
 154         the connection was probably interrupted.
 155         """
 156         # Both in bytes
 157         downloaded = None
 158         expected = None
 159
 160         def __init__(self, downloaded, expected):
 161                 self.downloaded = downloaded
 162                 self.expected = expected
 163
 164 class FileDownloader(object):
 165         """File Downloader class.
 166
 167         File downloader objects are the ones responsible of downloading the
 168         actual video file and writing it to disk if the user has requested
 169         it, among some other tasks. In most cases there should be one per
 170         program. As, given a video URL, the downloader doesn't know how to
 171         extract all the needed information, task that InfoExtractors do, it
 172         has to pass the URL to one of them.
 173
 174         For this, file downloader objects have a method that allows
 175         InfoExtractors to be registered in a given order. When it is passed
 176         a URL, the file downloader handles it to the first InfoExtractor it
 177         finds that reports being able to handle it. The InfoExtractor extracts
 178         all the information about the video or videos the URL refers to, and
 179         asks the FileDownloader to process the video information, possibly
 180         downloading the video.
 181
 182         File downloaders accept a lot of parameters. In order not to saturate
 183         the object constructor with arguments, it receives a dictionary of
 184         options instead. These options are available through the params
 185         attribute for the InfoExtractors to use. The FileDownloader also
 186         registers itself as the downloader in charge for the InfoExtractors
 187         that are added to it, so this is a "mutual registration".
 188
 189         Available options:
 190
 191         username:         Username for authentication purposes.
 192         password:         Password for authentication purposes.
 193         usenetrc:         Use netrc for authentication instead.
 194         quiet:            Do not print messages to stdout.
 195         forceurl:         Force printing final URL.
 196         forcetitle:       Force printing title.
 197         forcethumbnail:   Force printing thumbnail URL.
 198         forcedescription: Force printing description.
 199         simulate:         Do not download the video files.
 200         format:           Video format code.
 201         format_limit:     Highest quality format to try.
 202         outtmpl:          Template for output names.
 203         ignoreerrors:     Do not stop on download errors.
 204         ratelimit:        Download speed limit, in bytes/sec.
 205         nooverwrites:     Prevent overwriting files.
 206         retries:          Number of times to retry for HTTP error 5xx
 207         continuedl:       Try to continue downloads if possible.
 208         noprogress:       Do not print the progress bar.
 209         playliststart:    Playlist item to start at.
 210         playlistend:      Playlist item to end at.
 211         logtostderr:      Log messages to stderr instead of stdout.
 212         consoletitle:     Display progress in console window's titlebar.
 213         nopart:           Do not use temporary .part files.
 214         """
 215
 216         params = None
 217         _ies = []
 218         _pps = []
 219         _download_retcode = None
 220         _num_downloads = None
 221         _screen_file = None
 222
 223         def __init__(self, params):
 224                 """Create a FileDownloader object with the given options."""
 225                 self._ies = []
 226                 self._pps = []
 227                 self._download_retcode = 0
 228                 self._num_downloads = 0
 229                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 230                 self.params = params
 231
 232         @staticmethod
 233         def pmkdir(filename):
 234                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 235                 components = filename.split(os.sep)
 236                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 237                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 238                 for dir in aggregate:
 239                         if not os.path.exists(dir):
 240                                 os.mkdir(dir)
 241
 242         @staticmethod
 243         def format_bytes(bytes):
 244                 if bytes is None:
 245                         return 'N/A'
 246                 if type(bytes) is str:
 247                         bytes = float(bytes)
 248                 if bytes == 0.0:
 249                         exponent = 0
 250                 else:
 251                         exponent = long(math.log(bytes, 1024.0))
 252                 suffix = 'bkMGTPEZY'[exponent]
 253                 converted = float(bytes) / float(1024**exponent)
 254                 return '%.2f%s' % (converted, suffix)
 255
 256         @staticmethod
 257         def calc_percent(byte_counter, data_len):
 258                 if data_len is None:
 259                         return '---.-%'
 260                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 261
 262         @staticmethod
 263         def calc_eta(start, now, total, current):
 264                 if total is None:
 265                         return '--:--'
 266                 dif = now - start
 267                 if current == 0 or dif < 0.001: # One millisecond
 268                         return '--:--'
 269                 rate = float(current) / dif
 270                 eta = long((float(total) - float(current)) / rate)
 271                 (eta_mins, eta_secs) = divmod(eta, 60)
 272                 if eta_mins > 99:
 273                         return '--:--'
 274                 return '%02d:%02d' % (eta_mins, eta_secs)
 275
 276         @staticmethod
 277         def calc_speed(start, now, bytes):
 278                 dif = now - start
 279                 if bytes == 0 or dif < 0.001: # One millisecond
 280                         return '%10s' % '---b/s'
 281                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 282
 283         @staticmethod
 284         def best_block_size(elapsed_time, bytes):
 285                 new_min = max(bytes / 2.0, 1.0)
 286                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 287                 if elapsed_time < 0.001:
 288                         return long(new_max)
 289                 rate = bytes / elapsed_time
 290                 if rate > new_max:
 291                         return long(new_max)
 292                 if rate < new_min:
 293                         return long(new_min)
 294                 return long(rate)
 295
 296         @staticmethod
 297         def parse_bytes(bytestr):
 298                 """Parse a string indicating a byte quantity into a long integer."""
 299                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 300                 if matchobj is None:
 301                         return None
 302                 number = float(matchobj.group(1))
 303                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 304                 return long(round(number * multiplier))
 305
 306         def add_info_extractor(self, ie):
 307                 """Add an InfoExtractor object to the end of the list."""
 308                 self._ies.append(ie)
 309                 ie.set_downloader(self)
 310
 311         def add_post_processor(self, pp):
 312                 """Add a PostProcessor object to the end of the chain."""
 313                 self._pps.append(pp)
 314                 pp.set_downloader(self)
 315
 316         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
 317                 """Print message to stdout if not in quiet mode."""
 318                 try:
 319                         if not self.params.get('quiet', False):
 320                                 terminator = [u'\n', u''][skip_eol]
 321                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
 322                         self._screen_file.flush()
 323                 except (UnicodeEncodeError), err:
 324                         if not ignore_encoding_errors:
 325                                 raise
 326
 327         def to_stderr(self, message):
 328                 """Print message to stderr."""
 329                 print >>sys.stderr, message.encode(preferredencoding())
 330
 331         def to_cons_title(self, message):
 332                 """Set console/terminal window title to message."""
 333                 if not self.params.get('consoletitle', False):
 334                         return
 335                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 336                         # c_wchar_p() might not be necessary if `message` is
 337                         # already of type unicode()
 338                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 339                 elif 'TERM' in os.environ:
 340                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 341
 342         def fixed_template(self):
 343                 """Checks if the output template is fixed."""
 344                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 345
 346         def trouble(self, message=None):
 347                 """Determine action to take when a download problem appears.
 348
 349                 Depending on if the downloader has been configured to ignore
 350                 download errors or not, this method may throw an exception or
 351                 not when errors are found, after printing the message.
 352                 """
 353                 if message is not None:
 354                         self.to_stderr(message)
 355                 if not self.params.get('ignoreerrors', False):
 356                         raise DownloadError(message)
 357                 self._download_retcode = 1
 358
 359         def slow_down(self, start_time, byte_counter):
 360                 """Sleep if the download speed is over the rate limit."""
 361                 rate_limit = self.params.get('ratelimit', None)
 362                 if rate_limit is None or byte_counter == 0:
 363                         return
 364                 now = time.time()
 365                 elapsed = now - start_time
 366                 if elapsed <= 0.0:
 367                         return
 368                 speed = float(byte_counter) / elapsed
 369                 if speed > rate_limit:
 370                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 371
 372         def temp_name(self, filename):
 373                 """Returns a temporary filename for the given filename."""
 374                 if self.params.get('nopart', False) or filename == u'-' or \
 375                                 (os.path.exists(filename) and not os.path.isfile(filename)):
 376                         return filename
 377                 return filename + u'.part'
 378
 379         def try_rename(self, old_filename, new_filename):
 380                 try:
 381                         if old_filename == new_filename:
 382                                 return
 383                         os.rename(old_filename, new_filename)
 384                 except (IOError, OSError), err:
 385                         self.trouble(u'ERROR: unable to rename file')
 386
 387         def report_destination(self, filename):
 388                 """Report destination filename."""
 389                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
 390
 391         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 392                 """Report download progress."""
 393                 if self.params.get('noprogress', False):
 394                         return
 395                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 396                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 397                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 398                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 399
 400         def report_resuming_byte(self, resume_len):
 401                 """Report attempt to resume at given byte."""
 402                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 403
 404         def report_retry(self, count, retries):
 405                 """Report retry in case of HTTP error 5xx"""
 406                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 407
 408         def report_file_already_downloaded(self, file_name):
 409                 """Report file has already been fully downloaded."""
 410                 try:
 411                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
 412                 except (UnicodeEncodeError), err:
 413                         self.to_screen(u'[download] The file has already been downloaded')
 414
 415         def report_unable_to_resume(self):
 416                 """Report it was impossible to resume download."""
 417                 self.to_screen(u'[download] Unable to resume')
 418
 419         def report_finish(self):
 420                 """Report download finished."""
 421                 if self.params.get('noprogress', False):
 422                         self.to_screen(u'[download] Download completed')
 423                 else:
 424                         self.to_screen(u'')
 425
 426         def increment_downloads(self):
 427                 """Increment the ordinal that assigns a number to each file."""
 428                 self._num_downloads += 1
 429
 430         def process_info(self, info_dict):
 431                 """Process a single dictionary returned by an InfoExtractor."""
 432                 # Do nothing else if in simulate mode
 433                 if self.params.get('simulate', False):
 434                         # Forced printings
 435                         if self.params.get('forcetitle', False):
 436                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 437                         if self.params.get('forceurl', False):
 438                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 439                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 440                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
 441                         if self.params.get('forcedescription', False) and 'description' in info_dict:
 442                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
 443
 444                         return
 445
 446                 try:
 447                         template_dict = dict(info_dict)
 448                         template_dict['epoch'] = unicode(long(time.time()))
 449                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 450                         filename = self.params['outtmpl'] % template_dict
 451                 except (ValueError, KeyError), err:
 452                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
 453                         return
 454                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 455                         self.to_stderr(u'WARNING: file exists and will be skipped')
 456                         return
 457
 458                 try:
 459                         self.pmkdir(filename)
 460                 except (OSError, IOError), err:
 461                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
 462                         return
 463
 464                 try:
 465                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
 466                 except (OSError, IOError), err:
 467                         raise UnavailableVideoError
 468                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 469                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 470                         return
 471                 except (ContentTooShortError, ), err:
 472                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 473                         return
 474
 475                 if success:
 476                         try:
 477                                 self.post_process(filename, info_dict)
 478                         except (PostProcessingError), err:
 479                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
 480                                 return
 481
 482         def download(self, url_list):
 483                 """Download a given list of URLs."""
 484                 if len(url_list) > 1 and self.fixed_template():
 485                         raise SameFileError(self.params['outtmpl'])
 486
 487                 for url in url_list:
 488                         suitable_found = False
 489                         for ie in self._ies:
 490                                 # Go to next InfoExtractor if not suitable
 491                                 if not ie.suitable(url):
 492                                         continue
 493
 494                                 # Suitable InfoExtractor found
 495                                 suitable_found = True
 496
 497                                 # Extract information from URL and process it
 498                                 ie.extract(url)
 499
 500                                 # Suitable InfoExtractor had been found; go to next URL
 501                                 break
 502
 503                         if not suitable_found:
 504                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 505
 506                 return self._download_retcode
 507
 508         def post_process(self, filename, ie_info):
 509                 """Run the postprocessing chain on the given file."""
 510                 info = dict(ie_info)
 511                 info['filepath'] = filename
 512                 for pp in self._pps:
 513                         info = pp.run(info)
 514                         if info is None:
 515                                 break
 516
 517         def _download_with_rtmpdump(self, filename, url, player_url):
 518                 self.report_destination(filename)
 519                 tmpfilename = self.temp_name(filename)
 520
 521                 # Check for rtmpdump first
 522                 try:
 523                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 524                 except (OSError, IOError):
 525                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 526                         return False
 527
 528                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 529                 # the connection was interrumpted and resuming appears to be
 530                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 531                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 532                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 533                 while retval == 2 or retval == 1:
 534                         prevsize = os.path.getsize(tmpfilename)
 535                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 536                         time.sleep(5.0) # This seems to be needed
 537                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 538                         cursize = os.path.getsize(tmpfilename)
 539                         if prevsize == cursize and retval == 1:
 540                                 break
 541                 if retval == 0:
 542                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
 543                         self.try_rename(tmpfilename, filename)
 544                         return True
 545                 else:
 546                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 547                         return False
 548
 549         def _do_download(self, filename, url, player_url):
 550                 # Check file already present
 551                 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
 552                         self.report_file_already_downloaded(filename)
 553                         return True
 554
 555                 # Attempt to download using rtmpdump
 556                 if url.startswith('rtmp'):
 557                         return self._download_with_rtmpdump(filename, url, player_url)
 558
 559                 tmpfilename = self.temp_name(filename)
 560                 stream = None
 561                 open_mode = 'wb'
 562                 basic_request = urllib2.Request(url, None, std_headers)
 563                 request = urllib2.Request(url, None, std_headers)
 564
 565                 # Establish possible resume length
 566                 if os.path.isfile(tmpfilename):
 567                         resume_len = os.path.getsize(tmpfilename)
 568                 else:
 569                         resume_len = 0
 570
 571                 # Request parameters in case of being able to resume
 572                 if self.params.get('continuedl', False) and resume_len != 0:
 573                         self.report_resuming_byte(resume_len)
 574                         request.add_header('Range','bytes=%d-' % resume_len)
 575                         open_mode = 'ab'
 576
 577                 count = 0
 578                 retries = self.params.get('retries', 0)
 579                 while count <= retries:
 580                         # Establish connection
 581                         try:
 582                                 data = urllib2.urlopen(request)
 583                                 break
 584                         except (urllib2.HTTPError, ), err:
 585                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 586                                         # Unexpected HTTP error
 587                                         raise
 588                                 elif err.code == 416:
 589                                         # Unable to resume (requested range not satisfiable)
 590                                         try:
 591                                                 # Open the connection again without the range header
 592                                                 data = urllib2.urlopen(basic_request)
 593                                                 content_length = data.info()['Content-Length']
 594                                         except (urllib2.HTTPError, ), err:
 595                                                 if err.code < 500 or err.code >= 600:
 596                                                         raise
 597                                         else:
 598                                                 # Examine the reported length
 599                                                 if (content_length is not None and
 600                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
 601                                                         # The file had already been fully downloaded.
 602                                                         # Explanation to the above condition: in issue #175 it was revealed that
 603                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
 604                                                         # changing the file size slightly and causing problems for some users. So
 605                                                         # I decided to implement a suggested change and consider the file
 606                                                         # completely downloaded if the file size differs less than 100 bytes from
 607                                                         # the one in the hard drive.
 608                                                         self.report_file_already_downloaded(filename)
 609                                                         self.try_rename(tmpfilename, filename)
 610                                                         return True
 611                                                 else:
 612                                                         # The length does not match, we start the download over
 613                                                         self.report_unable_to_resume()
 614                                                         open_mode = 'wb'
 615                                                         break
 616                         # Retry
 617                         count += 1
 618                         if count <= retries:
 619                                 self.report_retry(count, retries)
 620
 621                 if count > retries:
 622                         self.trouble(u'ERROR: giving up after %s retries' % retries)
 623                         return False
 624
 625                 data_len = data.info().get('Content-length', None)
 626                 if data_len is not None:
 627                         data_len = long(data_len) + resume_len
 628                 data_len_str = self.format_bytes(data_len)
 629                 byte_counter = 0 + resume_len
 630                 block_size = 1024
 631                 start = time.time()
 632                 while True:
 633                         # Download and write
 634                         before = time.time()
 635                         data_block = data.read(block_size)
 636                         after = time.time()
 637                         if len(data_block) == 0:
 638                                 break
 639                         byte_counter += len(data_block)
 640
 641                         # Open file just in time
 642                         if stream is None:
 643                                 try:
 644                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 645                                         self.report_destination(filename)
 646                                 except (OSError, IOError), err:
 647                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 648                                         return False
 649                         try:
 650                                 stream.write(data_block)
 651                         except (IOError, OSError), err:
 652                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 653                                 return False
 654                         block_size = self.best_block_size(after - before, len(data_block))
 655
 656                         # Progress message
 657                         percent_str = self.calc_percent(byte_counter, data_len)
 658                         eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 659                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 660                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 661
 662                         # Apply rate limit
 663                         self.slow_down(start, byte_counter - resume_len)
 664
 665                 stream.close()
 666                 self.report_finish()
 667                 if data_len is not None and byte_counter != data_len:
 668                         raise ContentTooShortError(byte_counter, long(data_len))
 669                 self.try_rename(tmpfilename, filename)
 670                 return True
 671
 672 class InfoExtractor(object):
 673         """Information Extractor class.
 674
 675         Information extractors are the classes that, given a URL, extract
 676         information from the video (or videos) the URL refers to. This
 677         information includes the real video URL, the video title and simplified
 678         title, author and others. The information is stored in a dictionary
 679         which is then passed to the FileDownloader. The FileDownloader
 680         processes this information possibly downloading the video to the file
 681         system, among other possible outcomes. The dictionaries must include
 682         the following fields:
 683
 684         id:             Video identifier.
 685         url:            Final video URL.
 686         uploader:       Nickname of the video uploader.
 687         title:          Literal title.
 688         stitle:         Simplified title.
 689         ext:            Video filename extension.
 690         format:         Video format.
 691         player_url:     SWF Player URL (may be None).
 692
 693         The following fields are optional. Their primary purpose is to allow
 694         youtube-dl to serve as the backend for a video search function, such
 695         as the one in youtube2mp3.  They are only used when their respective
 696         forced printing functions are called:
 697
 698         thumbnail:      Full URL to a video thumbnail image.
 699         description:    One-line video description.
 700
 701         Subclasses of this one should re-define the _real_initialize() and
 702         _real_extract() methods, as well as the suitable() static method.
 703         Probably, they should also be instantiated and added to the main
 704         downloader.
 705         """
 706
 707         _ready = False
 708         _downloader = None
 709
 710         def __init__(self, downloader=None):
 711                 """Constructor. Receives an optional downloader."""
 712                 self._ready = False
 713                 self.set_downloader(downloader)
 714
 715         @staticmethod
 716         def suitable(url):
 717                 """Receives a URL and returns True if suitable for this IE."""
 718                 return False
 719
 720         def initialize(self):
 721                 """Initializes an instance (authentication, etc)."""
 722                 if not self._ready:
 723                         self._real_initialize()
 724                         self._ready = True
 725
 726         def extract(self, url):
 727                 """Extracts URL information and returns it in list of dicts."""
 728                 self.initialize()
 729                 return self._real_extract(url)
 730
 731         def set_downloader(self, downloader):
 732                 """Sets the downloader for this IE."""
 733                 self._downloader = downloader
 734
 735         def _real_initialize(self):
 736                 """Real initialization process. Redefine in subclasses."""
 737                 pass
 738
 739         def _real_extract(self, url):
 740                 """Real extraction process. Redefine in subclasses."""
 741                 pass
 742
 743 class YoutubeIE(InfoExtractor):
 744         """Information extractor for youtube.com."""
 745
 746         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
 747         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 748         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
 749         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 750         _NETRC_MACHINE = 'youtube'
 751         # Listed in order of quality
 752         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
 753         _video_extensions = {
 754                 '13': '3gp',
 755                 '17': 'mp4',
 756                 '18': 'mp4',
 757                 '22': 'mp4',
 758                 '37': 'mp4',
 759                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
 760                 '43': 'webm',
 761                 '45': 'webm',
 762         }
 763
 764         @staticmethod
 765         def suitable(url):
 766                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 767
 768         def report_lang(self):
 769                 """Report attempt to set language."""
 770                 self._downloader.to_screen(u'[youtube] Setting language')
 771
 772         def report_login(self):
 773                 """Report attempt to log in."""
 774                 self._downloader.to_screen(u'[youtube] Logging in')
 775
 776         def report_age_confirmation(self):
 777                 """Report attempt to confirm age."""
 778                 self._downloader.to_screen(u'[youtube] Confirming age')
 779
 780         def report_video_webpage_download(self, video_id):
 781                 """Report attempt to download video webpage."""
 782                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
 783
 784         def report_video_info_webpage_download(self, video_id):
 785                 """Report attempt to download video info webpage."""
 786                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
 787
 788         def report_information_extraction(self, video_id):
 789                 """Report attempt to extract video information."""
 790                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
 791
 792         def report_unavailable_format(self, video_id, format):
 793                 """Report extracted video URL."""
 794                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
 795
 796         def report_rtmp_download(self):
 797                 """Indicate the download will use the RTMP protocol."""
 798                 self._downloader.to_screen(u'[youtube] RTMP download detected')
 799
 800         def _real_initialize(self):
 801                 if self._downloader is None:
 802                         return
 803
 804                 username = None
 805                 password = None
 806                 downloader_params = self._downloader.params
 807
 808                 # Attempt to use provided username and password or .netrc data
 809                 if downloader_params.get('username', None) is not None:
 810                         username = downloader_params['username']
 811                         password = downloader_params['password']
 812                 elif downloader_params.get('usenetrc', False):
 813                         try:
 814                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 815                                 if info is not None:
 816                                         username = info[0]
 817                                         password = info[2]
 818                                 else:
 819                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 820                         except (IOError, netrc.NetrcParseError), err:
 821                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 822                                 return
 823
 824                 # Set language
 825                 request = urllib2.Request(self._LANG_URL, None, std_headers)
 826                 try:
 827                         self.report_lang()
 828                         urllib2.urlopen(request).read()
 829                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 830                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 831                         return
 832
 833                 # No authentication to be performed
 834                 if username is None:
 835                         return
 836
 837                 # Log in
 838                 login_form = {
 839                                 'current_form': 'loginForm',
 840                                 'next':         '/',
 841                                 'action_login': 'Log In',
 842                                 'username':     username,
 843                                 'password':     password,
 844                                 }
 845                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
 846                 try:
 847                         self.report_login()
 848                         login_results = urllib2.urlopen(request).read()
 849                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 850                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 851                                 return
 852                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 853                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 854                         return
 855
 856                 # Confirm age
 857                 age_form = {
 858                                 'next_url':             '/',
 859                                 'action_confirm':       'Confirm',
 860                                 }
 861                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
 862                 try:
 863                         self.report_age_confirmation()
 864                         age_results = urllib2.urlopen(request).read()
 865                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 866                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 867                         return
 868
 869         def _real_extract(self, url):
 870                 # Extract video id from URL
 871                 mobj = re.match(self._VALID_URL, url)
 872                 if mobj is None:
 873                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 874                         return
 875                 video_id = mobj.group(2)
 876
 877                 # Get video webpage
 878                 self.report_video_webpage_download(video_id)
 879                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id, None, std_headers)
 880                 try:
 881                         video_webpage = urllib2.urlopen(request).read()
 882                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 883                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
 884                         return
 885
 886                 # Attempt to extract SWF player URL
 887                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
 888                 if mobj is not None:
 889                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
 890                 else:
 891                         player_url = None
 892
 893                 # Get video info
 894                 self.report_video_info_webpage_download(video_id)
 895                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
 896                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
 897                                            % (video_id, el_type))
 898                         request = urllib2.Request(video_info_url, None, std_headers)
 899                         try:
 900                                 video_info_webpage = urllib2.urlopen(request).read()
 901                                 video_info = parse_qs(video_info_webpage)
 902                                 if 'token' in video_info:
 903                                         break
 904                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 905                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
 906                                 return
 907                 if 'token' not in video_info:
 908                         if 'reason' in video_info:
 909                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
 910                         else:
 911                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
 912                         return
 913
 914                 # Start extracting information
 915                 self.report_information_extraction(video_id)
 916
 917                 # uploader
 918                 if 'author' not in video_info:
 919                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 920                         return
 921                 video_uploader = urllib.unquote_plus(video_info['author'][0])
 922
 923                 # title
 924                 if 'title' not in video_info:
 925                         self._downloader.trouble(u'ERROR: unable to extract video title')
 926                         return
 927                 video_title = urllib.unquote_plus(video_info['title'][0])
 928                 video_title = video_title.decode('utf-8')
 929                 video_title = sanitize_title(video_title)
 930
 931                 # simplified title
 932                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 933                 simple_title = simple_title.strip(ur'_')
 934
 935                 # thumbnail image
 936                 if 'thumbnail_url' not in video_info:
 937                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
 938                         video_thumbnail = ''
 939                 else:   # don't panic if we can't find it
 940                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
 941
 942                 # upload date
 943                 upload_date = u'NA'
 944                 mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
 945                 if mobj is not None:
 946                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
 947                         format_expressions = ['%d %B %Y', '%B %d %Y']
 948                         for expression in format_expressions:
 949                                 try:
 950                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
 951                                 except:
 952                                         pass
 953
 954                 # description
 955                 video_description = 'No description available.'
 956                 if self._downloader.params.get('forcedescription', False):
 957                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
 958                         if mobj is not None:
 959                                 video_description = mobj.group(1)
 960
 961                 # token
 962                 video_token = urllib.unquote_plus(video_info['token'][0])
 963
 964                 # Decide which formats to download
 965                 req_format = self._downloader.params.get('format', None)
 966
 967                 if 'fmt_url_map' in video_info:
 968                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
 969                         format_limit = self._downloader.params.get('format_limit', None)
 970                         if format_limit is not None and format_limit in self._available_formats:
 971                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
 972                         else:
 973                                 format_list = self._available_formats
 974                         existing_formats = [x for x in format_list if x in url_map]
 975                         if len(existing_formats) == 0:
 976                                 self._downloader.trouble(u'ERROR: no known formats available for video')
 977                                 return
 978                         if req_format is None:
 979                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
 980                         elif req_format == '-1':
 981                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
 982                         else:
 983                                 # Specific format
 984                                 if req_format not in url_map:
 985                                         self._downloader.trouble(u'ERROR: requested format not available')
 986                                         return
 987                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
 988
 989                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
 990                         self.report_rtmp_download()
 991                         video_url_list = [(None, video_info['conn'][0])]
 992
 993                 else:
 994                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
 995                         return
 996
 997                 for format_param, video_real_url in video_url_list:
 998                         # At this point we have a new video
 999                         self._downloader.increment_downloads()
1000
1001                         # Extension
1002                         video_extension = self._video_extensions.get(format_param, 'flv')
1003
1004                         # Find the video URL in fmt_url_map or conn paramters
1005                         try:
1006                                 # Process video information
1007                                 self._downloader.process_info({
1008                                         'id':           video_id.decode('utf-8'),
1009                                         'url':          video_real_url.decode('utf-8'),
1010                                         'uploader':     video_uploader.decode('utf-8'),
1011                                         'upload_date':  upload_date,
1012                                         'title':        video_title,
1013                                         'stitle':       simple_title,
1014                                         'ext':          video_extension.decode('utf-8'),
1015                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1016                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1017                                         'description':  video_description.decode('utf-8'),
1018                                         'player_url':   player_url,
1019                                 })
1020                         except UnavailableVideoError, err:
1021                                 self._downloader.trouble(u'\nERROR: unable to download video')
1022
1023
1024 class MetacafeIE(InfoExtractor):
1025         """Information Extractor for metacafe.com."""
1026
1027         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1028         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1029         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1030         _youtube_ie = None
1031
1032         def __init__(self, youtube_ie, downloader=None):
1033                 InfoExtractor.__init__(self, downloader)
1034                 self._youtube_ie = youtube_ie
1035
1036         @staticmethod
1037         def suitable(url):
1038                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1039
1040         def report_disclaimer(self):
1041                 """Report disclaimer retrieval."""
1042                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1043
1044         def report_age_confirmation(self):
1045                 """Report attempt to confirm age."""
1046                 self._downloader.to_screen(u'[metacafe] Confirming age')
1047
1048         def report_download_webpage(self, video_id):
1049                 """Report webpage download."""
1050                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1051
1052         def report_extraction(self, video_id):
1053                 """Report information extraction."""
1054                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1055
1056         def _real_initialize(self):
1057                 # Retrieve disclaimer
1058                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
1059                 try:
1060                         self.report_disclaimer()
1061                         disclaimer = urllib2.urlopen(request).read()
1062                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1063                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1064                         return
1065
1066                 # Confirm age
1067                 disclaimer_form = {
1068                         'filters': '0',
1069                         'submit': "Continue - I'm over 18",
1070                         }
1071                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
1072                 try:
1073                         self.report_age_confirmation()
1074                         disclaimer = urllib2.urlopen(request).read()
1075                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1076                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1077                         return
1078
1079         def _real_extract(self, url):
1080                 # Extract id and simplified title from URL
1081                 mobj = re.match(self._VALID_URL, url)
1082                 if mobj is None:
1083                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1084                         return
1085
1086                 video_id = mobj.group(1)
1087
1088                 # Check if video comes from YouTube
1089                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1090                 if mobj2 is not None:
1091                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1092                         return
1093
1094                 # At this point we have a new video
1095                 self._downloader.increment_downloads()
1096
1097                 simple_title = mobj.group(2).decode('utf-8')
1098
1099                 # Retrieve video webpage to extract further information
1100                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1101                 try:
1102                         self.report_download_webpage(video_id)
1103                         webpage = urllib2.urlopen(request).read()
1104                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1105                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1106                         return
1107
1108                 # Extract URL, uploader and title from webpage
1109                 self.report_extraction(video_id)
1110                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1111                 if mobj is not None:
1112                         mediaURL = urllib.unquote(mobj.group(1))
1113                         video_extension = mediaURL[-3:]
1114
1115                         # Extract gdaKey if available
1116                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1117                         if mobj is None:
1118                                 video_url = mediaURL
1119                         else:
1120                                 gdaKey = mobj.group(1)
1121                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1122                 else:
1123                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1124                         if mobj is None:
1125                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1126                                 return
1127                         vardict = parse_qs(mobj.group(1))
1128                         if 'mediaData' not in vardict:
1129                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1130                                 return
1131                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1132                         if mobj is None:
1133                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1134                                 return
1135                         mediaURL = mobj.group(1).replace('\\/', '/')
1136                         video_extension = mediaURL[-3:]
1137                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1138
1139                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1140                 if mobj is None:
1141                         self._downloader.trouble(u'ERROR: unable to extract title')
1142                         return
1143                 video_title = mobj.group(1).decode('utf-8')
1144                 video_title = sanitize_title(video_title)
1145
1146                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1147                 if mobj is None:
1148                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1149                         return
1150                 video_uploader = mobj.group(1)
1151
1152                 try:
1153                         # Process video information
1154                         self._downloader.process_info({
1155                                 'id':           video_id.decode('utf-8'),
1156                                 'url':          video_url.decode('utf-8'),
1157                                 'uploader':     video_uploader.decode('utf-8'),
1158                                 'upload_date':  u'NA',
1159                                 'title':        video_title,
1160                                 'stitle':       simple_title,
1161                                 'ext':          video_extension.decode('utf-8'),
1162                                 'format':       u'NA',
1163                                 'player_url':   None,
1164                         })
1165                 except UnavailableVideoError:
1166                         self._downloader.trouble(u'\nERROR: unable to download video')
1167
1168
1169 class DailymotionIE(InfoExtractor):
1170         """Information Extractor for Dailymotion"""
1171
1172         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1173
1174         def __init__(self, downloader=None):
1175                 InfoExtractor.__init__(self, downloader)
1176
1177         @staticmethod
1178         def suitable(url):
1179                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1180
1181         def report_download_webpage(self, video_id):
1182                 """Report webpage download."""
1183                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1184
1185         def report_extraction(self, video_id):
1186                 """Report information extraction."""
1187                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1188
1189         def _real_initialize(self):
1190                 return
1191
1192         def _real_extract(self, url):
1193                 # Extract id and simplified title from URL
1194                 mobj = re.match(self._VALID_URL, url)
1195                 if mobj is None:
1196                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1197                         return
1198
1199                 # At this point we have a new video
1200                 self._downloader.increment_downloads()
1201                 video_id = mobj.group(1)
1202
1203                 simple_title = mobj.group(2).decode('utf-8')
1204                 video_extension = 'flv'
1205
1206                 # Retrieve video webpage to extract further information
1207                 request = urllib2.Request(url)
1208                 try:
1209                         self.report_download_webpage(video_id)
1210                         webpage = urllib2.urlopen(request).read()
1211                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1212                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1213                         return
1214
1215                 # Extract URL, uploader and title from webpage
1216                 self.report_extraction(video_id)
1217                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1218                 if mobj is None:
1219                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1220                         return
1221                 mediaURL = urllib.unquote(mobj.group(1))
1222
1223                 # if needed add http://www.dailymotion.com/ if relative URL
1224
1225                 video_url = mediaURL
1226
1227                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1228                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1229                 if mobj is None:
1230                         self._downloader.trouble(u'ERROR: unable to extract title')
1231                         return
1232                 video_title = mobj.group(1).decode('utf-8')
1233                 video_title = sanitize_title(video_title)
1234
1235                 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
1236                 if mobj is None:
1237                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1238                         return
1239                 video_uploader = mobj.group(1)
1240
1241                 try:
1242                         # Process video information
1243                         self._downloader.process_info({
1244                                 'id':           video_id.decode('utf-8'),
1245                                 'url':          video_url.decode('utf-8'),
1246                                 'uploader':     video_uploader.decode('utf-8'),
1247                                 'upload_date':  u'NA',
1248                                 'title':        video_title,
1249                                 'stitle':       simple_title,
1250                                 'ext':          video_extension.decode('utf-8'),
1251                                 'format':       u'NA',
1252                                 'player_url':   None,
1253                         })
1254                 except UnavailableVideoError:
1255                         self._downloader.trouble(u'\nERROR: unable to download video')
1256
1257 class GoogleIE(InfoExtractor):
1258         """Information extractor for video.google.com."""
1259
1260         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1261
1262         def __init__(self, downloader=None):
1263                 InfoExtractor.__init__(self, downloader)
1264
1265         @staticmethod
1266         def suitable(url):
1267                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1268
1269         def report_download_webpage(self, video_id):
1270                 """Report webpage download."""
1271                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1272
1273         def report_extraction(self, video_id):
1274                 """Report information extraction."""
1275                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1276
1277         def _real_initialize(self):
1278                 return
1279
1280         def _real_extract(self, url):
1281                 # Extract id from URL
1282                 mobj = re.match(self._VALID_URL, url)
1283                 if mobj is None:
1284                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1285                         return
1286
1287                 # At this point we have a new video
1288                 self._downloader.increment_downloads()
1289                 video_id = mobj.group(1)
1290
1291                 video_extension = 'mp4'
1292
1293                 # Retrieve video webpage to extract further information
1294                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1295                 try:
1296                         self.report_download_webpage(video_id)
1297                         webpage = urllib2.urlopen(request).read()
1298                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1299                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1300                         return
1301
1302                 # Extract URL, uploader, and title from webpage
1303                 self.report_extraction(video_id)
1304                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1305                 if mobj is None:
1306                         video_extension = 'flv'
1307                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1308                 if mobj is None:
1309                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1310                         return
1311                 mediaURL = urllib.unquote(mobj.group(1))
1312                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1313                 mediaURL = mediaURL.replace('\\x26', '\x26')
1314
1315                 video_url = mediaURL
1316
1317                 mobj = re.search(r'<title>(.*)</title>', webpage)
1318                 if mobj is None:
1319                         self._downloader.trouble(u'ERROR: unable to extract title')
1320                         return
1321                 video_title = mobj.group(1).decode('utf-8')
1322                 video_title = sanitize_title(video_title)
1323                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1324
1325                 # Extract video description
1326                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1327                 if mobj is None:
1328                         self._downloader.trouble(u'ERROR: unable to extract video description')
1329                         return
1330                 video_description = mobj.group(1).decode('utf-8')
1331                 if not video_description:
1332                         video_description = 'No description available.'
1333
1334                 # Extract video thumbnail
1335                 if self._downloader.params.get('forcethumbnail', False):
1336                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1337                         try:
1338                                 webpage = urllib2.urlopen(request).read()
1339                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1340                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1341                                 return
1342                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1343                         if mobj is None:
1344                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1345                                 return
1346                         video_thumbnail = mobj.group(1)
1347                 else:   # we need something to pass to process_info
1348                         video_thumbnail = ''
1349
1350
1351                 try:
1352                         # Process video information
1353                         self._downloader.process_info({
1354                                 'id':           video_id.decode('utf-8'),
1355                                 'url':          video_url.decode('utf-8'),
1356                                 'uploader':     u'NA',
1357                                 'upload_date':  u'NA',
1358                                 'title':        video_title,
1359                                 'stitle':       simple_title,
1360                                 'ext':          video_extension.decode('utf-8'),
1361                                 'format':       u'NA',
1362                                 'player_url':   None,
1363                         })
1364                 except UnavailableVideoError:
1365                         self._downloader.trouble(u'\nERROR: unable to download video')
1366
1367
1368 class PhotobucketIE(InfoExtractor):
1369         """Information extractor for photobucket.com."""
1370
1371         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1372
1373         def __init__(self, downloader=None):
1374                 InfoExtractor.__init__(self, downloader)
1375
1376         @staticmethod
1377         def suitable(url):
1378                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1379
1380         def report_download_webpage(self, video_id):
1381                 """Report webpage download."""
1382                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1383
1384         def report_extraction(self, video_id):
1385                 """Report information extraction."""
1386                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1387
1388         def _real_initialize(self):
1389                 return
1390
1391         def _real_extract(self, url):
1392                 # Extract id from URL
1393                 mobj = re.match(self._VALID_URL, url)
1394                 if mobj is None:
1395                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1396                         return
1397
1398                 # At this point we have a new video
1399                 self._downloader.increment_downloads()
1400                 video_id = mobj.group(1)
1401
1402                 video_extension = 'flv'
1403
1404                 # Retrieve video webpage to extract further information
1405                 request = urllib2.Request(url)
1406                 try:
1407                         self.report_download_webpage(video_id)
1408                         webpage = urllib2.urlopen(request).read()
1409                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1410                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1411                         return
1412
1413                 # Extract URL, uploader, and title from webpage
1414                 self.report_extraction(video_id)
1415                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1416                 if mobj is None:
1417                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1418                         return
1419                 mediaURL = urllib.unquote(mobj.group(1))
1420
1421                 video_url = mediaURL
1422
1423                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1424                 if mobj is None:
1425                         self._downloader.trouble(u'ERROR: unable to extract title')
1426                         return
1427                 video_title = mobj.group(1).decode('utf-8')
1428                 video_title = sanitize_title(video_title)
1429                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1430
1431                 video_uploader = mobj.group(2).decode('utf-8')
1432
1433                 try:
1434                         # Process video information
1435                         self._downloader.process_info({
1436                                 'id':           video_id.decode('utf-8'),
1437                                 'url':          video_url.decode('utf-8'),
1438                                 'uploader':     video_uploader,
1439                                 'upload_date':  u'NA',
1440                                 'title':        video_title,
1441                                 'stitle':       simple_title,
1442                                 'ext':          video_extension.decode('utf-8'),
1443                                 'format':       u'NA',
1444                                 'player_url':   None,
1445                         })
1446                 except UnavailableVideoError:
1447                         self._downloader.trouble(u'\nERROR: unable to download video')
1448
1449
1450 class YahooIE(InfoExtractor):
1451         """Information extractor for video.yahoo.com."""
1452
1453         # _VALID_URL matches all Yahoo! Video URLs
1454         # _VPAGE_URL matches only the extractable '/watch/' URLs
1455         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1456         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1457
1458         def __init__(self, downloader=None):
1459                 InfoExtractor.__init__(self, downloader)
1460
1461         @staticmethod
1462         def suitable(url):
1463                 return (re.match(YahooIE._VALID_URL, url) is not None)
1464
1465         def report_download_webpage(self, video_id):
1466                 """Report webpage download."""
1467                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1468
1469         def report_extraction(self, video_id):
1470                 """Report information extraction."""
1471                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1472
1473         def _real_initialize(self):
1474                 return
1475
1476         def _real_extract(self, url, new_video=True):
1477                 # Extract ID from URL
1478                 mobj = re.match(self._VALID_URL, url)
1479                 if mobj is None:
1480                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1481                         return
1482
1483                 # At this point we have a new video
1484                 self._downloader.increment_downloads()
1485                 video_id = mobj.group(2)
1486                 video_extension = 'flv'
1487
1488                 # Rewrite valid but non-extractable URLs as
1489                 # extractable English language /watch/ URLs
1490                 if re.match(self._VPAGE_URL, url) is None:
1491                         request = urllib2.Request(url)
1492                         try:
1493                                 webpage = urllib2.urlopen(request).read()
1494                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1495                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1496                                 return
1497
1498                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1499                         if mobj is None:
1500                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1501                                 return
1502                         yahoo_id = mobj.group(1)
1503
1504                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1505                         if mobj is None:
1506                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1507                                 return
1508                         yahoo_vid = mobj.group(1)
1509
1510                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1511                         return self._real_extract(url, new_video=False)
1512
1513                 # Retrieve video webpage to extract further information
1514                 request = urllib2.Request(url)
1515                 try:
1516                         self.report_download_webpage(video_id)
1517                         webpage = urllib2.urlopen(request).read()
1518                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1519                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1520                         return
1521
1522                 # Extract uploader and title from webpage
1523                 self.report_extraction(video_id)
1524                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1525                 if mobj is None:
1526                         self._downloader.trouble(u'ERROR: unable to extract video title')
1527                         return
1528                 video_title = mobj.group(1).decode('utf-8')
1529                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1530
1531                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1532                 if mobj is None:
1533                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1534                         return
1535                 video_uploader = mobj.group(1).decode('utf-8')
1536
1537                 # Extract video thumbnail
1538                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1539                 if mobj is None:
1540                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1541                         return
1542                 video_thumbnail = mobj.group(1).decode('utf-8')
1543
1544                 # Extract video description
1545                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1546                 if mobj is None:
1547                         self._downloader.trouble(u'ERROR: unable to extract video description')
1548                         return
1549                 video_description = mobj.group(1).decode('utf-8')
1550                 if not video_description: video_description = 'No description available.'
1551
1552                 # Extract video height and width
1553                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1554                 if mobj is None:
1555                         self._downloader.trouble(u'ERROR: unable to extract video height')
1556                         return
1557                 yv_video_height = mobj.group(1)
1558
1559                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1560                 if mobj is None:
1561                         self._downloader.trouble(u'ERROR: unable to extract video width')
1562                         return
1563                 yv_video_width = mobj.group(1)
1564
1565                 # Retrieve video playlist to extract media URL
1566                 # I'm not completely sure what all these options are, but we
1567                 # seem to need most of them, otherwise the server sends a 401.
1568                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1569                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1570                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1571                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1572                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1573                 try:
1574                         self.report_download_webpage(video_id)
1575                         webpage = urllib2.urlopen(request).read()
1576                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1577                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1578                         return
1579
1580                 # Extract media URL from playlist XML
1581                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1582                 if mobj is None:
1583                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1584                         return
1585                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1586                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1587
1588                 try:
1589                         # Process video information
1590                         self._downloader.process_info({
1591                                 'id':           video_id.decode('utf-8'),
1592                                 'url':          video_url,
1593                                 'uploader':     video_uploader,
1594                                 'upload_date':  u'NA',
1595                                 'title':        video_title,
1596                                 'stitle':       simple_title,
1597                                 'ext':          video_extension.decode('utf-8'),
1598                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1599                                 'description':  video_description,
1600                                 'thumbnail':    video_thumbnail,
1601                                 'description':  video_description,
1602                                 'player_url':   None,
1603                         })
1604                 except UnavailableVideoError:
1605                         self._downloader.trouble(u'\nERROR: unable to download video')
1606
1607
1608 class GenericIE(InfoExtractor):
1609         """Generic last-resort information extractor."""
1610
1611         def __init__(self, downloader=None):
1612                 InfoExtractor.__init__(self, downloader)
1613
1614         @staticmethod
1615         def suitable(url):
1616                 return True
1617
1618         def report_download_webpage(self, video_id):
1619                 """Report webpage download."""
1620                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1621                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1622
1623         def report_extraction(self, video_id):
1624                 """Report information extraction."""
1625                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1626
1627         def _real_initialize(self):
1628                 return
1629
1630         def _real_extract(self, url):
1631                 # At this point we have a new video
1632                 self._downloader.increment_downloads()
1633
1634                 video_id = url.split('/')[-1]
1635                 request = urllib2.Request(url)
1636                 try:
1637                         self.report_download_webpage(video_id)
1638                         webpage = urllib2.urlopen(request).read()
1639                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1640                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1641                         return
1642                 except ValueError, err:
1643                         # since this is the last-resort InfoExtractor, if
1644                         # this error is thrown, it'll be thrown here
1645                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1646                         return
1647
1648                 self.report_extraction(video_id)
1649                 # Start with something easy: JW Player in SWFObject
1650                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1651                 if mobj is None:
1652                         # Broaden the search a little bit
1653                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1654                 if mobj is None:
1655                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1656                         return
1657
1658                 # It's possible that one of the regexes
1659                 # matched, but returned an empty group:
1660                 if mobj.group(1) is None:
1661                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1662                         return
1663
1664                 video_url = urllib.unquote(mobj.group(1))
1665                 video_id  = os.path.basename(video_url)
1666
1667                 # here's a fun little line of code for you:
1668                 video_extension = os.path.splitext(video_id)[1][1:]
1669                 video_id        = os.path.splitext(video_id)[0]
1670
1671                 # it's tempting to parse this further, but you would
1672                 # have to take into account all the variations like
1673                 #   Video Title - Site Name
1674                 #   Site Name | Video Title
1675                 #   Video Title - Tagline | Site Name
1676                 # and so on and so forth; it's just not practical
1677                 mobj = re.search(r'<title>(.*)</title>', webpage)
1678                 if mobj is None:
1679                         self._downloader.trouble(u'ERROR: unable to extract title')
1680                         return
1681                 video_title = mobj.group(1).decode('utf-8')
1682                 video_title = sanitize_title(video_title)
1683                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1684
1685                 # video uploader is domain name
1686                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1687                 if mobj is None:
1688                         self._downloader.trouble(u'ERROR: unable to extract title')
1689                         return
1690                 video_uploader = mobj.group(1).decode('utf-8')
1691
1692                 try:
1693                         # Process video information
1694                         self._downloader.process_info({
1695                                 'id':           video_id.decode('utf-8'),
1696                                 'url':          video_url.decode('utf-8'),
1697                                 'uploader':     video_uploader,
1698                                 'upload_date':  u'NA',
1699                                 'title':        video_title,
1700                                 'stitle':       simple_title,
1701                                 'ext':          video_extension.decode('utf-8'),
1702                                 'format':       u'NA',
1703                                 'player_url':   None,
1704                         })
1705                 except UnavailableVideoError, err:
1706                         self._downloader.trouble(u'\nERROR: unable to download video')
1707
1708
1709 class YoutubeSearchIE(InfoExtractor):
1710         """Information Extractor for YouTube search queries."""
1711         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1712         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1713         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1714         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1715         _youtube_ie = None
1716         _max_youtube_results = 1000
1717
1718         def __init__(self, youtube_ie, downloader=None):
1719                 InfoExtractor.__init__(self, downloader)
1720                 self._youtube_ie = youtube_ie
1721
1722         @staticmethod
1723         def suitable(url):
1724                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1725
1726         def report_download_page(self, query, pagenum):
1727                 """Report attempt to download playlist page with given number."""
1728                 query = query.decode(preferredencoding())
1729                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1730
1731         def _real_initialize(self):
1732                 self._youtube_ie.initialize()
1733
1734         def _real_extract(self, query):
1735                 mobj = re.match(self._VALID_QUERY, query)
1736                 if mobj is None:
1737                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1738                         return
1739
1740                 prefix, query = query.split(':')
1741                 prefix = prefix[8:]
1742                 query  = query.encode('utf-8')
1743                 if prefix == '':
1744                         self._download_n_results(query, 1)
1745                         return
1746                 elif prefix == 'all':
1747                         self._download_n_results(query, self._max_youtube_results)
1748                         return
1749                 else:
1750                         try:
1751                                 n = long(prefix)
1752                                 if n <= 0:
1753                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1754                                         return
1755                                 elif n > self._max_youtube_results:
1756                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1757                                         n = self._max_youtube_results
1758                                 self._download_n_results(query, n)
1759                                 return
1760                         except ValueError: # parsing prefix as integer fails
1761                                 self._download_n_results(query, 1)
1762                                 return
1763
1764         def _download_n_results(self, query, n):
1765                 """Downloads a specified number of results for a query"""
1766
1767                 video_ids = []
1768                 already_seen = set()
1769                 pagenum = 1
1770
1771                 while True:
1772                         self.report_download_page(query, pagenum)
1773                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1774                         request = urllib2.Request(result_url, None, std_headers)
1775                         try:
1776                                 page = urllib2.urlopen(request).read()
1777                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1778                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1779                                 return
1780
1781                         # Extract video identifiers
1782                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1783                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1784                                 if video_id not in already_seen:
1785                                         video_ids.append(video_id)
1786                                         already_seen.add(video_id)
1787                                         if len(video_ids) == n:
1788                                                 # Specified n videos reached
1789                                                 for id in video_ids:
1790                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1791                                                 return
1792
1793                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1794                                 for id in video_ids:
1795                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1796                                 return
1797
1798                         pagenum = pagenum + 1
1799
1800 class GoogleSearchIE(InfoExtractor):
1801         """Information Extractor for Google Video search queries."""
1802         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1803         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1804         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1805         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1806         _google_ie = None
1807         _max_google_results = 1000
1808
1809         def __init__(self, google_ie, downloader=None):
1810                 InfoExtractor.__init__(self, downloader)
1811                 self._google_ie = google_ie
1812
1813         @staticmethod
1814         def suitable(url):
1815                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1816
1817         def report_download_page(self, query, pagenum):
1818                 """Report attempt to download playlist page with given number."""
1819                 query = query.decode(preferredencoding())
1820                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1821
1822         def _real_initialize(self):
1823                 self._google_ie.initialize()
1824
1825         def _real_extract(self, query):
1826                 mobj = re.match(self._VALID_QUERY, query)
1827                 if mobj is None:
1828                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1829                         return
1830
1831                 prefix, query = query.split(':')
1832                 prefix = prefix[8:]
1833                 query  = query.encode('utf-8')
1834                 if prefix == '':
1835                         self._download_n_results(query, 1)
1836                         return
1837                 elif prefix == 'all':
1838                         self._download_n_results(query, self._max_google_results)
1839                         return
1840                 else:
1841                         try:
1842                                 n = long(prefix)
1843                                 if n <= 0:
1844                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1845                                         return
1846                                 elif n > self._max_google_results:
1847                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1848                                         n = self._max_google_results
1849                                 self._download_n_results(query, n)
1850                                 return
1851                         except ValueError: # parsing prefix as integer fails
1852                                 self._download_n_results(query, 1)
1853                                 return
1854
1855         def _download_n_results(self, query, n):
1856                 """Downloads a specified number of results for a query"""
1857
1858                 video_ids = []
1859                 already_seen = set()
1860                 pagenum = 1
1861
1862                 while True:
1863                         self.report_download_page(query, pagenum)
1864                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1865                         request = urllib2.Request(result_url, None, std_headers)
1866                         try:
1867                                 page = urllib2.urlopen(request).read()
1868                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1869                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1870                                 return
1871
1872                         # Extract video identifiers
1873                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1874                                 video_id = mobj.group(1)
1875                                 if video_id not in already_seen:
1876                                         video_ids.append(video_id)
1877                                         already_seen.add(video_id)
1878                                         if len(video_ids) == n:
1879                                                 # Specified n videos reached
1880                                                 for id in video_ids:
1881                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1882                                                 return
1883
1884                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1885                                 for id in video_ids:
1886                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1887                                 return
1888
1889                         pagenum = pagenum + 1
1890
1891 class YahooSearchIE(InfoExtractor):
1892         """Information Extractor for Yahoo! Video search queries."""
1893         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1894         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1895         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1896         _MORE_PAGES_INDICATOR = r'\s*Next'
1897         _yahoo_ie = None
1898         _max_yahoo_results = 1000
1899
1900         def __init__(self, yahoo_ie, downloader=None):
1901                 InfoExtractor.__init__(self, downloader)
1902                 self._yahoo_ie = yahoo_ie
1903
1904         @staticmethod
1905         def suitable(url):
1906                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1907
1908         def report_download_page(self, query, pagenum):
1909                 """Report attempt to download playlist page with given number."""
1910                 query = query.decode(preferredencoding())
1911                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1912
1913         def _real_initialize(self):
1914                 self._yahoo_ie.initialize()
1915
1916         def _real_extract(self, query):
1917                 mobj = re.match(self._VALID_QUERY, query)
1918                 if mobj is None:
1919                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1920                         return
1921
1922                 prefix, query = query.split(':')
1923                 prefix = prefix[8:]
1924                 query  = query.encode('utf-8')
1925                 if prefix == '':
1926                         self._download_n_results(query, 1)
1927                         return
1928                 elif prefix == 'all':
1929                         self._download_n_results(query, self._max_yahoo_results)
1930                         return
1931                 else:
1932                         try:
1933                                 n = long(prefix)
1934                                 if n <= 0:
1935                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1936                                         return
1937                                 elif n > self._max_yahoo_results:
1938                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
1939                                         n = self._max_yahoo_results
1940                                 self._download_n_results(query, n)
1941                                 return
1942                         except ValueError: # parsing prefix as integer fails
1943                                 self._download_n_results(query, 1)
1944                                 return
1945
1946         def _download_n_results(self, query, n):
1947                 """Downloads a specified number of results for a query"""
1948
1949                 video_ids = []
1950                 already_seen = set()
1951                 pagenum = 1
1952
1953                 while True:
1954                         self.report_download_page(query, pagenum)
1955                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1956                         request = urllib2.Request(result_url, None, std_headers)
1957                         try:
1958                                 page = urllib2.urlopen(request).read()
1959                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1960                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1961                                 return
1962
1963                         # Extract video identifiers
1964                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1965                                 video_id = mobj.group(1)
1966                                 if video_id not in already_seen:
1967                                         video_ids.append(video_id)
1968                                         already_seen.add(video_id)
1969                                         if len(video_ids) == n:
1970                                                 # Specified n videos reached
1971                                                 for id in video_ids:
1972                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1973                                                 return
1974
1975                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1976                                 for id in video_ids:
1977                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1978                                 return
1979
1980                         pagenum = pagenum + 1
1981
1982 class YoutubePlaylistIE(InfoExtractor):
1983         """Information Extractor for YouTube playlists."""
1984
1985         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1986         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1987         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1988         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1989         _youtube_ie = None
1990
1991         def __init__(self, youtube_ie, downloader=None):
1992                 InfoExtractor.__init__(self, downloader)
1993                 self._youtube_ie = youtube_ie
1994
1995         @staticmethod
1996         def suitable(url):
1997                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1998
1999         def report_download_page(self, playlist_id, pagenum):
2000                 """Report attempt to download playlist page with given number."""
2001                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2002
2003         def _real_initialize(self):
2004                 self._youtube_ie.initialize()
2005
2006         def _real_extract(self, url):
2007                 # Extract playlist id
2008                 mobj = re.match(self._VALID_URL, url)
2009                 if mobj is None:
2010                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2011                         return
2012
2013                 # Download playlist pages
2014                 playlist_id = mobj.group(1)
2015                 video_ids = []
2016                 pagenum = 1
2017
2018                 while True:
2019                         self.report_download_page(playlist_id, pagenum)
2020                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
2021                         try:
2022                                 page = urllib2.urlopen(request).read()
2023                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2024                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2025                                 return
2026
2027                         # Extract video identifiers
2028                         ids_in_page = []
2029                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2030                                 if mobj.group(1) not in ids_in_page:
2031                                         ids_in_page.append(mobj.group(1))
2032                         video_ids.extend(ids_in_page)
2033
2034                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2035                                 break
2036                         pagenum = pagenum + 1
2037
2038                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2039                 playlistend = self._downloader.params.get('playlistend', -1)
2040                 video_ids = video_ids[playliststart:playlistend]
2041
2042                 for id in video_ids:
2043                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2044                 return
2045
2046 class YoutubeUserIE(InfoExtractor):
2047         """Information Extractor for YouTube users."""
2048
2049         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
2050         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2051         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
2052         _youtube_ie = None
2053
2054         def __init__(self, youtube_ie, downloader=None):
2055                 InfoExtractor.__init__(self, downloader)
2056                 self._youtube_ie = youtube_ie
2057
2058         @staticmethod
2059         def suitable(url):
2060                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2061
2062         def report_download_page(self, username):
2063                 """Report attempt to download user page."""
2064                 self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username))
2065
2066         def _real_initialize(self):
2067                 self._youtube_ie.initialize()
2068
2069         def _real_extract(self, url):
2070                 # Extract username
2071                 mobj = re.match(self._VALID_URL, url)
2072                 if mobj is None:
2073                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2074                         return
2075
2076                 # Download user page
2077                 username = mobj.group(1)
2078                 video_ids = []
2079                 pagenum = 1
2080
2081                 self.report_download_page(username)
2082                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
2083                 try:
2084                         page = urllib2.urlopen(request).read()
2085                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2086                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2087                         return
2088
2089                 # Extract video identifiers
2090                 ids_in_page = []
2091
2092                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2093                         if mobj.group(1) not in ids_in_page:
2094                                 ids_in_page.append(mobj.group(1))
2095                 video_ids.extend(ids_in_page)
2096
2097                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2098                 playlistend = self._downloader.params.get('playlistend', -1)
2099                 video_ids = video_ids[playliststart:playlistend]
2100
2101                 for id in video_ids:
2102                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2103                 return
2104
2105 class DepositFilesIE(InfoExtractor):
2106         """Information extractor for depositfiles.com"""
2107
2108         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2109
2110         def __init__(self, downloader=None):
2111                 InfoExtractor.__init__(self, downloader)
2112
2113         @staticmethod
2114         def suitable(url):
2115                 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2116
2117         def report_download_webpage(self, file_id):
2118                 """Report webpage download."""
2119                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2120
2121         def report_extraction(self, file_id):
2122                 """Report information extraction."""
2123                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2124
2125         def _real_initialize(self):
2126                 return
2127
2128         def _real_extract(self, url):
2129                 # At this point we have a new file
2130                 self._downloader.increment_downloads()
2131
2132                 file_id = url.split('/')[-1]
2133                 # Rebuild url in english locale
2134                 url = 'http://depositfiles.com/en/files/' + file_id
2135
2136                 # Retrieve file webpage with 'Free download' button pressed
2137                 free_download_indication = { 'gateway_result' : '1' }
2138                 request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers)
2139                 try:
2140                         self.report_download_webpage(file_id)
2141                         webpage = urllib2.urlopen(request).read()
2142                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2143                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2144                         return
2145
2146                 # Search for the real file URL
2147                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2148                 if (mobj is None) or (mobj.group(1) is None):
2149                         # Try to figure out reason of the error.
2150                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2151                         if (mobj is not None) and (mobj.group(1) is not None):
2152                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2153                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2154                         else:
2155                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2156                         return
2157
2158                 file_url = mobj.group(1)
2159                 file_extension = os.path.splitext(file_url)[1][1:]
2160
2161                 # Search for file title
2162                 mobj = re.search(r'<b title="(.*?)">', webpage)
2163                 if mobj is None:
2164                         self._downloader.trouble(u'ERROR: unable to extract title')
2165                         return
2166                 file_title = mobj.group(1).decode('utf-8')
2167
2168                 try:
2169                         # Process file information
2170                         self._downloader.process_info({
2171                                 'id':           file_id.decode('utf-8'),
2172                                 'url':          file_url.decode('utf-8'),
2173                                 'uploader':     u'NA',
2174                                 'upload_date':  u'NA',
2175                                 'title':        file_title,
2176                                 'stitle':       file_title,
2177                                 'ext':          file_extension.decode('utf-8'),
2178                                 'format':       u'NA',
2179                                 'player_url':   None,
2180                         })
2181                 except UnavailableVideoError, err:
2182                         self._downloader.trouble(u'ERROR: unable to download file')
2183
2184 class PostProcessor(object):
2185         """Post Processor class.
2186
2187         PostProcessor objects can be added to downloaders with their
2188         add_post_processor() method. When the downloader has finished a
2189         successful download, it will take its internal chain of PostProcessors
2190         and start calling the run() method on each one of them, first with
2191         an initial argument and then with the returned value of the previous
2192         PostProcessor.
2193
2194         The chain will be stopped if one of them ever returns None or the end
2195         of the chain is reached.
2196
2197         PostProcessor objects follow a "mutual registration" process similar
2198         to InfoExtractor objects.
2199         """
2200
2201         _downloader = None
2202
2203         def __init__(self, downloader=None):
2204                 self._downloader = downloader
2205
2206         def set_downloader(self, downloader):
2207                 """Sets the downloader for this PP."""
2208                 self._downloader = downloader
2209
2210         def run(self, information):
2211                 """Run the PostProcessor.
2212
2213                 The "information" argument is a dictionary like the ones
2214                 composed by InfoExtractors. The only difference is that this
2215                 one has an extra field called "filepath" that points to the
2216                 downloaded file.
2217
2218                 When this method returns None, the postprocessing chain is
2219                 stopped. However, this method may return an information
2220                 dictionary that will be passed to the next postprocessing
2221                 object in the chain. It can be the one it received after
2222                 changing some fields.
2223
2224                 In addition, this method may raise a PostProcessingError
2225                 exception that will be taken into account by the downloader
2226                 it was called from.
2227                 """
2228                 return information # by default, do nothing
2229
2230 ### MAIN PROGRAM ###
2231 if __name__ == '__main__':
2232         try:
2233                 # Modules needed only when running the main program
2234                 import getpass
2235                 import optparse
2236
2237                 # Function to update the program file with the latest version from bitbucket.org
2238                 def update_self(downloader, filename):
2239                         # Note: downloader only used for options
2240                         if not os.access (filename, os.W_OK):
2241                                 sys.exit('ERROR: no write permissions on %s' % filename)
2242
2243                         downloader.to_screen('Updating to latest stable version...')
2244                         latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2245                         latest_version = urllib.urlopen(latest_url).read().strip()
2246                         prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2247                         newcontent = urllib.urlopen(prog_url).read()
2248                         stream = open(filename, 'w')
2249                         stream.write(newcontent)
2250                         stream.close()
2251                         downloader.to_screen('Updated to version %s' % latest_version)
2252
2253                 # Parse command line
2254                 parser = optparse.OptionParser(
2255                         usage='Usage: %prog [options] url...',
2256                         version='2010.12.09',
2257                         conflict_handler='resolve',
2258                 )
2259
2260                 parser.add_option('-h', '--help',
2261                                 action='help', help='print this help text and exit')
2262                 parser.add_option('-v', '--version',
2263                                 action='version', help='print program version and exit')
2264                 parser.add_option('-U', '--update',
2265                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2266                 parser.add_option('-i', '--ignore-errors',
2267                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2268                 parser.add_option('-r', '--rate-limit',
2269                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2270                 parser.add_option('-R', '--retries',
2271                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2272                 parser.add_option('--playlist-start',
2273                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2274                 parser.add_option('--playlist-end',
2275                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2276                 parser.add_option('--dump-user-agent',
2277                                 action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False)
2278
2279                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2280                 authentication.add_option('-u', '--username',
2281                                 dest='username', metavar='USERNAME', help='account username')
2282                 authentication.add_option('-p', '--password',
2283                                 dest='password', metavar='PASSWORD', help='account password')
2284                 authentication.add_option('-n', '--netrc',
2285                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2286                 parser.add_option_group(authentication)
2287
2288                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2289                 video_format.add_option('-f', '--format',
2290                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2291                 video_format.add_option('--all-formats',
2292                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2293                 video_format.add_option('--max-quality',
2294                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2295                 parser.add_option_group(video_format)
2296
2297                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2298                 verbosity.add_option('-q', '--quiet',
2299                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2300                 verbosity.add_option('-s', '--simulate',
2301                                 action='store_true', dest='simulate', help='do not download video', default=False)
2302                 verbosity.add_option('-g', '--get-url',
2303                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2304                 verbosity.add_option('-e', '--get-title',
2305                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2306                 verbosity.add_option('--get-thumbnail',
2307                                 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2308                 verbosity.add_option('--get-description',
2309                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2310                 verbosity.add_option('--no-progress',
2311                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2312                 verbosity.add_option('--console-title',
2313                                 action='store_true', dest='consoletitle', help='display progress in console titlebar', default=False)
2314                 parser.add_option_group(verbosity)
2315
2316                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2317                 filesystem.add_option('-t', '--title',
2318                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2319                 filesystem.add_option('-l', '--literal',
2320                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2321                 filesystem.add_option('-A', '--auto-number',
2322                                 action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False)
2323                 filesystem.add_option('-o', '--output',
2324                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2325                 filesystem.add_option('-a', '--batch-file',
2326                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2327                 filesystem.add_option('-w', '--no-overwrites',
2328                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2329                 filesystem.add_option('-c', '--continue',
2330                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2331                 filesystem.add_option('--cookies',
2332                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2333                 filesystem.add_option('--no-part',
2334                                 action='store_true', dest='nopart', help='do not use .part files', default=False)
2335                 parser.add_option_group(filesystem)
2336
2337                 (opts, args) = parser.parse_args()
2338
2339                 # Open appropriate CookieJar
2340                 if opts.cookiefile is None:
2341                         jar = cookielib.CookieJar()
2342                 else:
2343                         try:
2344                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2345                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2346                                         jar.load()
2347                         except (IOError, OSError), err:
2348                                 sys.exit(u'ERROR: unable to open cookie file')
2349
2350                 # Dump user agent
2351                 if opts.dump_user_agent:
2352                         print std_headers['User-Agent']
2353                         sys.exit(0)
2354
2355                 # General configuration
2356                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2357                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2358                 urllib2.install_opener(urllib2.build_opener(cookie_processor))
2359                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2360
2361                 # Batch file verification
2362                 batchurls = []
2363                 if opts.batchfile is not None:
2364                         try:
2365                                 if opts.batchfile == '-':
2366                                         batchfd = sys.stdin
2367                                 else:
2368                                         batchfd = open(opts.batchfile, 'r')
2369                                 batchurls = batchfd.readlines()
2370                                 batchurls = [x.strip() for x in batchurls]
2371                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2372                         except IOError:
2373                                 sys.exit(u'ERROR: batch file could not be read')
2374                 all_urls = batchurls + args
2375
2376                 # Conflicting, missing and erroneous options
2377                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2378                         parser.error(u'using .netrc conflicts with giving username/password')
2379                 if opts.password is not None and opts.username is None:
2380                         parser.error(u'account username missing')
2381                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2382                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2383                 if opts.usetitle and opts.useliteral:
2384                         parser.error(u'using title conflicts with using literal title')
2385                 if opts.username is not None and opts.password is None:
2386                         opts.password = getpass.getpass(u'Type account password and press return:')
2387                 if opts.ratelimit is not None:
2388                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2389                         if numeric_limit is None:
2390                                 parser.error(u'invalid rate limit specified')
2391                         opts.ratelimit = numeric_limit
2392                 if opts.retries is not None:
2393                         try:
2394                                 opts.retries = long(opts.retries)
2395                         except (TypeError, ValueError), err:
2396                                 parser.error(u'invalid retry count specified')
2397                 try:
2398                         opts.playliststart = long(opts.playliststart)
2399                         if opts.playliststart <= 0:
2400                                 raise ValueError
2401                 except (TypeError, ValueError), err:
2402                         parser.error(u'invalid playlist start number specified')
2403                 try:
2404                         opts.playlistend = long(opts.playlistend)
2405                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2406                                 raise ValueError
2407                 except (TypeError, ValueError), err:
2408                         parser.error(u'invalid playlist end number specified')
2409
2410                 # Information extractors
2411                 youtube_ie = YoutubeIE()
2412                 metacafe_ie = MetacafeIE(youtube_ie)
2413                 dailymotion_ie = DailymotionIE()
2414                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2415                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2416                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2417                 google_ie = GoogleIE()
2418                 google_search_ie = GoogleSearchIE(google_ie)
2419                 photobucket_ie = PhotobucketIE()
2420                 yahoo_ie = YahooIE()
2421                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2422                 deposit_files_ie = DepositFilesIE()
2423                 generic_ie = GenericIE()
2424
2425                 # File downloader
2426                 fd = FileDownloader({
2427                         'usenetrc': opts.usenetrc,
2428                         'username': opts.username,
2429                         'password': opts.password,
2430                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2431                         'forceurl': opts.geturl,
2432                         'forcetitle': opts.gettitle,
2433                         'forcethumbnail': opts.getthumbnail,
2434                         'forcedescription': opts.getdescription,
2435                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2436                         'format': opts.format,
2437                         'format_limit': opts.format_limit,
2438                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2439                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2440                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2441                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2442                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2443                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2444                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2445                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2446                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2447                                 or u'%(id)s.%(ext)s'),
2448                         'ignoreerrors': opts.ignoreerrors,
2449                         'ratelimit': opts.ratelimit,
2450                         'nooverwrites': opts.nooverwrites,
2451                         'retries': opts.retries,
2452                         'continuedl': opts.continue_dl,
2453                         'noprogress': opts.noprogress,
2454                         'playliststart': opts.playliststart,
2455                         'playlistend': opts.playlistend,
2456                         'logtostderr': opts.outtmpl == '-',
2457                         'consoletitle': opts.consoletitle,
2458                         'nopart': opts.nopart,
2459                         })
2460                 fd.add_info_extractor(youtube_search_ie)
2461                 fd.add_info_extractor(youtube_pl_ie)
2462                 fd.add_info_extractor(youtube_user_ie)
2463                 fd.add_info_extractor(metacafe_ie)
2464                 fd.add_info_extractor(dailymotion_ie)
2465                 fd.add_info_extractor(youtube_ie)
2466                 fd.add_info_extractor(google_ie)
2467                 fd.add_info_extractor(google_search_ie)
2468                 fd.add_info_extractor(photobucket_ie)
2469                 fd.add_info_extractor(yahoo_ie)
2470                 fd.add_info_extractor(yahoo_search_ie)
2471                 fd.add_info_extractor(deposit_files_ie)
2472
2473                 # This must come last since it's the
2474                 # fallback if none of the others work
2475                 fd.add_info_extractor(generic_ie)
2476
2477                 # Update version
2478                 if opts.update_self:
2479                         update_self(fd, sys.argv[0])
2480
2481                 # Maybe do nothing
2482                 if len(all_urls) < 1:
2483                         if not opts.update_self:
2484                                 parser.error(u'you must provide at least one URL')
2485                         else:
2486                                 sys.exit()
2487                 retcode = fd.download(all_urls)
2488
2489                 # Dump cookie jar if requested
2490                 if opts.cookiefile is not None:
2491                         try:
2492                                 jar.save()
2493                         except (IOError, OSError), err:
2494                                 sys.exit(u'ERROR: unable to save cookie jar')
2495
2496                 sys.exit(retcode)
2497
2498         except DownloadError:
2499                 sys.exit(1)
2500         except SameFileError:
2501                 sys.exit(u'ERROR: fixed output name but more than one file to download')
2502         except KeyboardInterrupt:
2503                 sys.exit(u'\nERROR: Interrupted by user')