youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 __author__  = (
   5         "Ricardo Garcia Gonzalez",
   6         "Danny Colligan",
   7         "Benjamin Johnson",
   8         "Vasyl' Vavrychuk",
   9         "Witold Baryluk",
  10         "Paweł Paprota",
  11         "Gergely Imreh",
  12         )
  13
  14 __license__ = "Public Domain"
  15
  16 import cookielib
  17 import ctypes
  18 import datetime
  19 import email.utils
  20 import gzip
  21 import htmlentitydefs
  22 import httplib
  23 import locale
  24 import math
  25 import netrc
  26 import os
  27 import os.path
  28 import re
  29 import socket
  30 import string
  31 import StringIO
  32 import subprocess
  33 import sys
  34 import time
  35 import urllib
  36 import urllib2
  37 import zlib
  38
  39 # parse_qs was moved from the cgi module to the urlparse module recently.
  40 try:
  41         from urlparse import parse_qs
  42 except ImportError:
  43         from cgi import parse_qs
  44
  45 std_headers = {
  46         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
  47         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  48         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  49         'Accept-Encoding': 'gzip, deflate',
  50         'Accept-Language': 'en-us,en;q=0.5',
  51 }
  52
  53 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  54
  55 def preferredencoding():
  56         """Get preferred encoding.
  57
  58         Returns the best encoding scheme for the system, based on
  59         locale.getpreferredencoding() and some further tweaks.
  60         """
  61         def yield_preferredencoding():
  62                 try:
  63                         pref = locale.getpreferredencoding()
  64                         u'TEST'.encode(pref)
  65                 except:
  66                         pref = 'UTF-8'
  67                 while True:
  68                         yield pref
  69         return yield_preferredencoding().next()
  70
  71 def htmlentity_transform(matchobj):
  72         """Transforms an HTML entity to a Unicode character.
  73
  74         This function receives a match object and is intended to be used with
  75         the re.sub() function.
  76         """
  77         entity = matchobj.group(1)
  78
  79         # Known non-numeric HTML entity
  80         if entity in htmlentitydefs.name2codepoint:
  81                 return unichr(htmlentitydefs.name2codepoint[entity])
  82
  83         # Unicode character
  84         mobj = re.match(ur'(?u)#(x?\d+)', entity)
  85         if mobj is not None:
  86                 numstr = mobj.group(1)
  87                 if numstr.startswith(u'x'):
  88                         base = 16
  89                         numstr = u'0%s' % numstr
  90                 else:
  91                         base = 10
  92                 return unichr(long(numstr, base))
  93
  94         # Unknown entity in name, return its literal representation
  95         return (u'&%s;' % entity)
  96
  97 def sanitize_title(utitle):
  98         """Sanitizes a video title so it could be used as part of a filename."""
  99         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
 100         return utitle.replace(unicode(os.sep), u'%')
 101
 102 def sanitize_open(filename, open_mode):
 103         """Try to open the given filename, and slightly tweak it if this fails.
 104
 105         Attempts to open the given filename. If this fails, it tries to change
 106         the filename slightly, step by step, until it's either able to open it
 107         or it fails and raises a final exception, like the standard open()
 108         function.
 109
 110         It returns the tuple (stream, definitive_file_name).
 111         """
 112         try:
 113                 if filename == u'-':
 114                         if sys.platform == 'win32':
 115                                 import msvcrt
 116                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 117                         return (sys.stdout, filename)
 118                 stream = open(filename, open_mode)
 119                 return (stream, filename)
 120         except (IOError, OSError), err:
 121                 # In case of error, try to remove win32 forbidden chars
 122                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
 123
 124                 # An exception here should be caught in the caller
 125                 stream = open(filename, open_mode)
 126                 return (stream, filename)
 127
 128 def timeconvert(timestr):
 129     """Convert RFC 2822 defined time string into system timestamp"""
 130     timestamp = None
 131     timetuple = email.utils.parsedate_tz(timestr)
 132     if timetuple is not None:
 133         timestamp = email.utils.mktime_tz(timetuple)
 134     return timestamp
 135
 136 class DownloadError(Exception):
 137         """Download Error exception.
 138
 139         This exception may be thrown by FileDownloader objects if they are not
 140         configured to continue on errors. They will contain the appropriate
 141         error message.
 142         """
 143         pass
 144
 145 class SameFileError(Exception):
 146         """Same File exception.
 147
 148         This exception will be thrown by FileDownloader objects if they detect
 149         multiple files would have to be downloaded to the same file on disk.
 150         """
 151         pass
 152
 153 class PostProcessingError(Exception):
 154         """Post Processing exception.
 155
 156         This exception may be raised by PostProcessor's .run() method to
 157         indicate an error in the postprocessing task.
 158         """
 159         pass
 160
 161 class UnavailableVideoError(Exception):
 162         """Unavailable Format exception.
 163
 164         This exception will be thrown when a video is requested
 165         in a format that is not available for that video.
 166         """
 167         pass
 168
 169 class ContentTooShortError(Exception):
 170         """Content Too Short exception.
 171
 172         This exception may be raised by FileDownloader objects when a file they
 173         download is too small for what the server announced first, indicating
 174         the connection was probably interrupted.
 175         """
 176         # Both in bytes
 177         downloaded = None
 178         expected = None
 179
 180         def __init__(self, downloaded, expected):
 181                 self.downloaded = downloaded
 182                 self.expected = expected
 183
 184 class YoutubeDLHandler(urllib2.HTTPHandler):
 185         """Handler for HTTP requests and responses.
 186
 187         This class, when installed with an OpenerDirector, automatically adds
 188         the standard headers to every HTTP request and handles gzipped and
 189         deflated responses from web servers. If compression is to be avoided in
 190         a particular request, the original request in the program code only has
 191         to include the HTTP header "Youtubedl-No-Compression", which will be
 192         removed before making the real request.
 193
 194         Part of this code was copied from:
 195
 196           http://techknack.net/python-urllib2-handlers/
 197
 198         Andrew Rowls, the author of that code, agreed to release it to the
 199         public domain.
 200         """
 201
 202         @staticmethod
 203         def deflate(data):
 204                 try:
 205                         return zlib.decompress(data, -zlib.MAX_WBITS)
 206                 except zlib.error:
 207                         return zlib.decompress(data)
 208
 209         @staticmethod
 210         def addinfourl_wrapper(stream, headers, url, code):
 211                 if hasattr(urllib2.addinfourl, 'getcode'):
 212                         return urllib2.addinfourl(stream, headers, url, code)
 213                 ret = urllib2.addinfourl(stream, headers, url)
 214                 ret.code = code
 215                 return ret
 216
 217         def http_request(self, req):
 218                 for h in std_headers:
 219                         if h in req.headers:
 220                                 del req.headers[h]
 221                         req.add_header(h, std_headers[h])
 222                 if 'Youtubedl-no-compression' in req.headers:
 223                         if 'Accept-encoding' in req.headers:
 224                                 del req.headers['Accept-encoding']
 225                         del req.headers['Youtubedl-no-compression']
 226                 return req
 227
 228         def http_response(self, req, resp):
 229                 old_resp = resp
 230                 # gzip
 231                 if resp.headers.get('Content-encoding', '') == 'gzip':
 232                         gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
 233                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 234                         resp.msg = old_resp.msg
 235                 # deflate
 236                 if resp.headers.get('Content-encoding', '') == 'deflate':
 237                         gz = StringIO.StringIO(self.deflate(resp.read()))
 238                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 239                         resp.msg = old_resp.msg
 240                 return resp
 241
 242 class FileDownloader(object):
 243         """File Downloader class.
 244
 245         File downloader objects are the ones responsible of downloading the
 246         actual video file and writing it to disk if the user has requested
 247         it, among some other tasks. In most cases there should be one per
 248         program. As, given a video URL, the downloader doesn't know how to
 249         extract all the needed information, task that InfoExtractors do, it
 250         has to pass the URL to one of them.
 251
 252         For this, file downloader objects have a method that allows
 253         InfoExtractors to be registered in a given order. When it is passed
 254         a URL, the file downloader handles it to the first InfoExtractor it
 255         finds that reports being able to handle it. The InfoExtractor extracts
 256         all the information about the video or videos the URL refers to, and
 257         asks the FileDownloader to process the video information, possibly
 258         downloading the video.
 259
 260         File downloaders accept a lot of parameters. In order not to saturate
 261         the object constructor with arguments, it receives a dictionary of
 262         options instead. These options are available through the params
 263         attribute for the InfoExtractors to use. The FileDownloader also
 264         registers itself as the downloader in charge for the InfoExtractors
 265         that are added to it, so this is a "mutual registration".
 266
 267         Available options:
 268
 269         username:         Username for authentication purposes.
 270         password:         Password for authentication purposes.
 271         usenetrc:         Use netrc for authentication instead.
 272         quiet:            Do not print messages to stdout.
 273         forceurl:         Force printing final URL.
 274         forcetitle:       Force printing title.
 275         forcethumbnail:   Force printing thumbnail URL.
 276         forcedescription: Force printing description.
 277         forcefilename:    Force printing final filename.
 278         simulate:         Do not download the video files.
 279         format:           Video format code.
 280         format_limit:     Highest quality format to try.
 281         outtmpl:          Template for output names.
 282         ignoreerrors:     Do not stop on download errors.
 283         ratelimit:        Download speed limit, in bytes/sec.
 284         nooverwrites:     Prevent overwriting files.
 285         retries:          Number of times to retry for HTTP error 5xx
 286         continuedl:       Try to continue downloads if possible.
 287         noprogress:       Do not print the progress bar.
 288         playliststart:    Playlist item to start at.
 289         playlistend:      Playlist item to end at.
 290         logtostderr:      Log messages to stderr instead of stdout.
 291         consoletitle:     Display progress in console window's titlebar.
 292         nopart:           Do not use temporary .part files.
 293         updatetime:       Use the Last-modified header to set output file timestamps.
 294         """
 295
 296         params = None
 297         _ies = []
 298         _pps = []
 299         _download_retcode = None
 300         _num_downloads = None
 301         _screen_file = None
 302
 303         def __init__(self, params):
 304                 """Create a FileDownloader object with the given options."""
 305                 self._ies = []
 306                 self._pps = []
 307                 self._download_retcode = 0
 308                 self._num_downloads = 0
 309                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 310                 self.params = params
 311
 312         @staticmethod
 313         def pmkdir(filename):
 314                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 315                 components = filename.split(os.sep)
 316                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 317                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 318                 for dir in aggregate:
 319                         if not os.path.exists(dir):
 320                                 os.mkdir(dir)
 321
 322         @staticmethod
 323         def format_bytes(bytes):
 324                 if bytes is None:
 325                         return 'N/A'
 326                 if type(bytes) is str:
 327                         bytes = float(bytes)
 328                 if bytes == 0.0:
 329                         exponent = 0
 330                 else:
 331                         exponent = long(math.log(bytes, 1024.0))
 332                 suffix = 'bkMGTPEZY'[exponent]
 333                 converted = float(bytes) / float(1024**exponent)
 334                 return '%.2f%s' % (converted, suffix)
 335
 336         @staticmethod
 337         def calc_percent(byte_counter, data_len):
 338                 if data_len is None:
 339                         return '---.-%'
 340                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 341
 342         @staticmethod
 343         def calc_eta(start, now, total, current):
 344                 if total is None:
 345                         return '--:--'
 346                 dif = now - start
 347                 if current == 0 or dif < 0.001: # One millisecond
 348                         return '--:--'
 349                 rate = float(current) / dif
 350                 eta = long((float(total) - float(current)) / rate)
 351                 (eta_mins, eta_secs) = divmod(eta, 60)
 352                 if eta_mins > 99:
 353                         return '--:--'
 354                 return '%02d:%02d' % (eta_mins, eta_secs)
 355
 356         @staticmethod
 357         def calc_speed(start, now, bytes):
 358                 dif = now - start
 359                 if bytes == 0 or dif < 0.001: # One millisecond
 360                         return '%10s' % '---b/s'
 361                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 362
 363         @staticmethod
 364         def best_block_size(elapsed_time, bytes):
 365                 new_min = max(bytes / 2.0, 1.0)
 366                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 367                 if elapsed_time < 0.001:
 368                         return long(new_max)
 369                 rate = bytes / elapsed_time
 370                 if rate > new_max:
 371                         return long(new_max)
 372                 if rate < new_min:
 373                         return long(new_min)
 374                 return long(rate)
 375
 376         @staticmethod
 377         def parse_bytes(bytestr):
 378                 """Parse a string indicating a byte quantity into a long integer."""
 379                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 380                 if matchobj is None:
 381                         return None
 382                 number = float(matchobj.group(1))
 383                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 384                 return long(round(number * multiplier))
 385
 386         def add_info_extractor(self, ie):
 387                 """Add an InfoExtractor object to the end of the list."""
 388                 self._ies.append(ie)
 389                 ie.set_downloader(self)
 390
 391         def add_post_processor(self, pp):
 392                 """Add a PostProcessor object to the end of the chain."""
 393                 self._pps.append(pp)
 394                 pp.set_downloader(self)
 395
 396         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
 397                 """Print message to stdout if not in quiet mode."""
 398                 try:
 399                         if not self.params.get('quiet', False):
 400                                 terminator = [u'\n', u''][skip_eol]
 401                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
 402                         self._screen_file.flush()
 403                 except (UnicodeEncodeError), err:
 404                         if not ignore_encoding_errors:
 405                                 raise
 406
 407         def to_stderr(self, message):
 408                 """Print message to stderr."""
 409                 print >>sys.stderr, message.encode(preferredencoding())
 410
 411         def to_cons_title(self, message):
 412                 """Set console/terminal window title to message."""
 413                 if not self.params.get('consoletitle', False):
 414                         return
 415                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 416                         # c_wchar_p() might not be necessary if `message` is
 417                         # already of type unicode()
 418                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 419                 elif 'TERM' in os.environ:
 420                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 421
 422         def fixed_template(self):
 423                 """Checks if the output template is fixed."""
 424                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 425
 426         def trouble(self, message=None):
 427                 """Determine action to take when a download problem appears.
 428
 429                 Depending on if the downloader has been configured to ignore
 430                 download errors or not, this method may throw an exception or
 431                 not when errors are found, after printing the message.
 432                 """
 433                 if message is not None:
 434                         self.to_stderr(message)
 435                 if not self.params.get('ignoreerrors', False):
 436                         raise DownloadError(message)
 437                 self._download_retcode = 1
 438
 439         def slow_down(self, start_time, byte_counter):
 440                 """Sleep if the download speed is over the rate limit."""
 441                 rate_limit = self.params.get('ratelimit', None)
 442                 if rate_limit is None or byte_counter == 0:
 443                         return
 444                 now = time.time()
 445                 elapsed = now - start_time
 446                 if elapsed <= 0.0:
 447                         return
 448                 speed = float(byte_counter) / elapsed
 449                 if speed > rate_limit:
 450                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 451
 452         def temp_name(self, filename):
 453                 """Returns a temporary filename for the given filename."""
 454                 if self.params.get('nopart', False) or filename == u'-' or \
 455                                 (os.path.exists(filename) and not os.path.isfile(filename)):
 456                         return filename
 457                 return filename + u'.part'
 458
 459         def undo_temp_name(self, filename):
 460                 if filename.endswith(u'.part'):
 461                         return filename[:-len(u'.part')]
 462                 return filename
 463
 464         def try_rename(self, old_filename, new_filename):
 465                 try:
 466                         if old_filename == new_filename:
 467                                 return
 468                         os.rename(old_filename, new_filename)
 469                 except (IOError, OSError), err:
 470                         self.trouble(u'ERROR: unable to rename file')
 471
 472         def try_utime(self, filename, last_modified_hdr):
 473                 """Try to set the last-modified time of the given file."""
 474                 if last_modified_hdr is None:
 475                         return
 476                 if not os.path.isfile(filename):
 477                         return
 478                 timestr = last_modified_hdr
 479                 if timestr is None:
 480                         return
 481                 filetime = timeconvert(timestr)
 482                 if filetime is None:
 483                         return
 484                 try:
 485                         os.utime(filename,(time.time(), filetime))
 486                 except:
 487                         pass
 488
 489         def report_destination(self, filename):
 490                 """Report destination filename."""
 491                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
 492
 493         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 494                 """Report download progress."""
 495                 if self.params.get('noprogress', False):
 496                         return
 497                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 498                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 499                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 500                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 501
 502         def report_resuming_byte(self, resume_len):
 503                 """Report attempt to resume at given byte."""
 504                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 505
 506         def report_retry(self, count, retries):
 507                 """Report retry in case of HTTP error 5xx"""
 508                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 509
 510         def report_file_already_downloaded(self, file_name):
 511                 """Report file has already been fully downloaded."""
 512                 try:
 513                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
 514                 except (UnicodeEncodeError), err:
 515                         self.to_screen(u'[download] The file has already been downloaded')
 516
 517         def report_unable_to_resume(self):
 518                 """Report it was impossible to resume download."""
 519                 self.to_screen(u'[download] Unable to resume')
 520
 521         def report_finish(self):
 522                 """Report download finished."""
 523                 if self.params.get('noprogress', False):
 524                         self.to_screen(u'[download] Download completed')
 525                 else:
 526                         self.to_screen(u'')
 527
 528         def increment_downloads(self):
 529                 """Increment the ordinal that assigns a number to each file."""
 530                 self._num_downloads += 1
 531
 532         def prepare_filename(self, info_dict):
 533                 """Generate the output filename."""
 534                 try:
 535                         template_dict = dict(info_dict)
 536                         template_dict['epoch'] = unicode(long(time.time()))
 537                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 538                         filename = self.params['outtmpl'] % template_dict
 539                         return filename
 540                 except (ValueError, KeyError), err:
 541                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
 542                         return None
 543
 544         def process_info(self, info_dict):
 545                 """Process a single dictionary returned by an InfoExtractor."""
 546                 filename = self.prepare_filename(info_dict)
 547                 # Do nothing else if in simulate mode
 548                 if self.params.get('simulate', False):
 549                         # Forced printings
 550                         if self.params.get('forcetitle', False):
 551                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 552                         if self.params.get('forceurl', False):
 553                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 554                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 555                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
 556                         if self.params.get('forcedescription', False) and 'description' in info_dict:
 557                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
 558                         if self.params.get('forcefilename', False) and filename is not None:
 559                                 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
 560
 561                         return
 562
 563                 if filename is None:
 564                         return
 565                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 566                         self.to_stderr(u'WARNING: file exists and will be skipped')
 567                         return
 568
 569                 try:
 570                         self.pmkdir(filename)
 571                 except (OSError, IOError), err:
 572                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
 573                         return
 574
 575                 try:
 576                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
 577                 except (OSError, IOError), err:
 578                         raise UnavailableVideoError
 579                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 580                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 581                         return
 582                 except (ContentTooShortError, ), err:
 583                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 584                         return
 585
 586                 if success:
 587                         try:
 588                                 self.post_process(filename, info_dict)
 589                         except (PostProcessingError), err:
 590                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
 591                                 return
 592
 593         def download(self, url_list):
 594                 """Download a given list of URLs."""
 595                 if len(url_list) > 1 and self.fixed_template():
 596                         raise SameFileError(self.params['outtmpl'])
 597
 598                 for url in url_list:
 599                         suitable_found = False
 600                         for ie in self._ies:
 601                                 # Go to next InfoExtractor if not suitable
 602                                 if not ie.suitable(url):
 603                                         continue
 604
 605                                 # Suitable InfoExtractor found
 606                                 suitable_found = True
 607
 608                                 # Extract information from URL and process it
 609                                 ie.extract(url)
 610
 611                                 # Suitable InfoExtractor had been found; go to next URL
 612                                 break
 613
 614                         if not suitable_found:
 615                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 616
 617                 return self._download_retcode
 618
 619         def post_process(self, filename, ie_info):
 620                 """Run the postprocessing chain on the given file."""
 621                 info = dict(ie_info)
 622                 info['filepath'] = filename
 623                 for pp in self._pps:
 624                         info = pp.run(info)
 625                         if info is None:
 626                                 break
 627
 628         def _download_with_rtmpdump(self, filename, url, player_url):
 629                 self.report_destination(filename)
 630                 tmpfilename = self.temp_name(filename)
 631
 632                 # Check for rtmpdump first
 633                 try:
 634                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 635                 except (OSError, IOError):
 636                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 637                         return False
 638
 639                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 640                 # the connection was interrumpted and resuming appears to be
 641                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 642                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 643                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 644                 while retval == 2 or retval == 1:
 645                         prevsize = os.path.getsize(tmpfilename)
 646                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 647                         time.sleep(5.0) # This seems to be needed
 648                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 649                         cursize = os.path.getsize(tmpfilename)
 650                         if prevsize == cursize and retval == 1:
 651                                 break
 652                 if retval == 0:
 653                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
 654                         self.try_rename(tmpfilename, filename)
 655                         return True
 656                 else:
 657                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 658                         return False
 659
 660         def _do_download(self, filename, url, player_url):
 661                 # Check file already present
 662                 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
 663                         self.report_file_already_downloaded(filename)
 664                         return True
 665
 666                 # Attempt to download using rtmpdump
 667                 if url.startswith('rtmp'):
 668                         return self._download_with_rtmpdump(filename, url, player_url)
 669
 670                 tmpfilename = self.temp_name(filename)
 671                 stream = None
 672                 open_mode = 'wb'
 673
 674                 # Do not include the Accept-Encoding header
 675                 headers = {'Youtubedl-no-compression': 'True'}
 676                 basic_request = urllib2.Request(url, None, headers)
 677                 request = urllib2.Request(url, None, headers)
 678
 679                 # Establish possible resume length
 680                 if os.path.isfile(tmpfilename):
 681                         resume_len = os.path.getsize(tmpfilename)
 682                 else:
 683                         resume_len = 0
 684
 685                 # Request parameters in case of being able to resume
 686                 if self.params.get('continuedl', False) and resume_len != 0:
 687                         self.report_resuming_byte(resume_len)
 688                         request.add_header('Range','bytes=%d-' % resume_len)
 689                         open_mode = 'ab'
 690
 691                 count = 0
 692                 retries = self.params.get('retries', 0)
 693                 while count <= retries:
 694                         # Establish connection
 695                         try:
 696                                 data = urllib2.urlopen(request)
 697                                 break
 698                         except (urllib2.HTTPError, ), err:
 699                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 700                                         # Unexpected HTTP error
 701                                         raise
 702                                 elif err.code == 416:
 703                                         # Unable to resume (requested range not satisfiable)
 704                                         try:
 705                                                 # Open the connection again without the range header
 706                                                 data = urllib2.urlopen(basic_request)
 707                                                 content_length = data.info()['Content-Length']
 708                                         except (urllib2.HTTPError, ), err:
 709                                                 if err.code < 500 or err.code >= 600:
 710                                                         raise
 711                                         else:
 712                                                 # Examine the reported length
 713                                                 if (content_length is not None and
 714                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
 715                                                         # The file had already been fully downloaded.
 716                                                         # Explanation to the above condition: in issue #175 it was revealed that
 717                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
 718                                                         # changing the file size slightly and causing problems for some users. So
 719                                                         # I decided to implement a suggested change and consider the file
 720                                                         # completely downloaded if the file size differs less than 100 bytes from
 721                                                         # the one in the hard drive.
 722                                                         self.report_file_already_downloaded(filename)
 723                                                         self.try_rename(tmpfilename, filename)
 724                                                         return True
 725                                                 else:
 726                                                         # The length does not match, we start the download over
 727                                                         self.report_unable_to_resume()
 728                                                         open_mode = 'wb'
 729                                                         break
 730                         # Retry
 731                         count += 1
 732                         if count <= retries:
 733                                 self.report_retry(count, retries)
 734
 735                 if count > retries:
 736                         self.trouble(u'ERROR: giving up after %s retries' % retries)
 737                         return False
 738
 739                 data_len = data.info().get('Content-length', None)
 740                 if data_len is not None:
 741                         data_len = long(data_len) + resume_len
 742                 data_len_str = self.format_bytes(data_len)
 743                 byte_counter = 0 + resume_len
 744                 block_size = 1024
 745                 start = time.time()
 746                 while True:
 747                         # Download and write
 748                         before = time.time()
 749                         data_block = data.read(block_size)
 750                         after = time.time()
 751                         if len(data_block) == 0:
 752                                 break
 753                         byte_counter += len(data_block)
 754
 755                         # Open file just in time
 756                         if stream is None:
 757                                 try:
 758                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 759                                         filename = self.undo_temp_name(tmpfilename)
 760                                         self.report_destination(filename)
 761                                 except (OSError, IOError), err:
 762                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 763                                         return False
 764                         try:
 765                                 stream.write(data_block)
 766                         except (IOError, OSError), err:
 767                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 768                                 return False
 769                         block_size = self.best_block_size(after - before, len(data_block))
 770
 771                         # Progress message
 772                         percent_str = self.calc_percent(byte_counter, data_len)
 773                         eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 774                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 775                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 776
 777                         # Apply rate limit
 778                         self.slow_down(start, byte_counter - resume_len)
 779
 780                 stream.close()
 781                 self.report_finish()
 782                 if data_len is not None and byte_counter != data_len:
 783                         raise ContentTooShortError(byte_counter, long(data_len))
 784                 self.try_rename(tmpfilename, filename)
 785
 786                 # Update file modification time
 787                 if self.params.get('updatetime', True):
 788                         self.try_utime(filename, data.info().get('last-modified', None))
 789
 790                 return True
 791
 792 class InfoExtractor(object):
 793         """Information Extractor class.
 794
 795         Information extractors are the classes that, given a URL, extract
 796         information from the video (or videos) the URL refers to. This
 797         information includes the real video URL, the video title and simplified
 798         title, author and others. The information is stored in a dictionary
 799         which is then passed to the FileDownloader. The FileDownloader
 800         processes this information possibly downloading the video to the file
 801         system, among other possible outcomes. The dictionaries must include
 802         the following fields:
 803
 804         id:             Video identifier.
 805         url:            Final video URL.
 806         uploader:       Nickname of the video uploader.
 807         title:          Literal title.
 808         stitle:         Simplified title.
 809         ext:            Video filename extension.
 810         format:         Video format.
 811         player_url:     SWF Player URL (may be None).
 812
 813         The following fields are optional. Their primary purpose is to allow
 814         youtube-dl to serve as the backend for a video search function, such
 815         as the one in youtube2mp3.  They are only used when their respective
 816         forced printing functions are called:
 817
 818         thumbnail:      Full URL to a video thumbnail image.
 819         description:    One-line video description.
 820
 821         Subclasses of this one should re-define the _real_initialize() and
 822         _real_extract() methods, as well as the suitable() static method.
 823         Probably, they should also be instantiated and added to the main
 824         downloader.
 825         """
 826
 827         _ready = False
 828         _downloader = None
 829
 830         def __init__(self, downloader=None):
 831                 """Constructor. Receives an optional downloader."""
 832                 self._ready = False
 833                 self.set_downloader(downloader)
 834
 835         @staticmethod
 836         def suitable(url):
 837                 """Receives a URL and returns True if suitable for this IE."""
 838                 return False
 839
 840         def initialize(self):
 841                 """Initializes an instance (authentication, etc)."""
 842                 if not self._ready:
 843                         self._real_initialize()
 844                         self._ready = True
 845
 846         def extract(self, url):
 847                 """Extracts URL information and returns it in list of dicts."""
 848                 self.initialize()
 849                 return self._real_extract(url)
 850
 851         def set_downloader(self, downloader):
 852                 """Sets the downloader for this IE."""
 853                 self._downloader = downloader
 854
 855         def _real_initialize(self):
 856                 """Real initialization process. Redefine in subclasses."""
 857                 pass
 858
 859         def _real_extract(self, url):
 860                 """Real extraction process. Redefine in subclasses."""
 861                 pass
 862
 863 class YoutubeIE(InfoExtractor):
 864         """Information extractor for youtube.com."""
 865
 866         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
 867         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 868         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
 869         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 870         _NETRC_MACHINE = 'youtube'
 871         # Listed in order of quality
 872         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
 873         _video_extensions = {
 874                 '13': '3gp',
 875                 '17': 'mp4',
 876                 '18': 'mp4',
 877                 '22': 'mp4',
 878                 '37': 'mp4',
 879                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
 880                 '43': 'webm',
 881                 '45': 'webm',
 882         }
 883
 884         @staticmethod
 885         def suitable(url):
 886                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 887
 888         def report_lang(self):
 889                 """Report attempt to set language."""
 890                 self._downloader.to_screen(u'[youtube] Setting language')
 891
 892         def report_login(self):
 893                 """Report attempt to log in."""
 894                 self._downloader.to_screen(u'[youtube] Logging in')
 895
 896         def report_age_confirmation(self):
 897                 """Report attempt to confirm age."""
 898                 self._downloader.to_screen(u'[youtube] Confirming age')
 899
 900         def report_video_webpage_download(self, video_id):
 901                 """Report attempt to download video webpage."""
 902                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
 903
 904         def report_video_info_webpage_download(self, video_id):
 905                 """Report attempt to download video info webpage."""
 906                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
 907
 908         def report_information_extraction(self, video_id):
 909                 """Report attempt to extract video information."""
 910                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
 911
 912         def report_unavailable_format(self, video_id, format):
 913                 """Report extracted video URL."""
 914                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
 915
 916         def report_rtmp_download(self):
 917                 """Indicate the download will use the RTMP protocol."""
 918                 self._downloader.to_screen(u'[youtube] RTMP download detected')
 919
 920         def _real_initialize(self):
 921                 if self._downloader is None:
 922                         return
 923
 924                 username = None
 925                 password = None
 926                 downloader_params = self._downloader.params
 927
 928                 # Attempt to use provided username and password or .netrc data
 929                 if downloader_params.get('username', None) is not None:
 930                         username = downloader_params['username']
 931                         password = downloader_params['password']
 932                 elif downloader_params.get('usenetrc', False):
 933                         try:
 934                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 935                                 if info is not None:
 936                                         username = info[0]
 937                                         password = info[2]
 938                                 else:
 939                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 940                         except (IOError, netrc.NetrcParseError), err:
 941                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 942                                 return
 943
 944                 # Set language
 945                 request = urllib2.Request(self._LANG_URL)
 946                 try:
 947                         self.report_lang()
 948                         urllib2.urlopen(request).read()
 949                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 950                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 951                         return
 952
 953                 # No authentication to be performed
 954                 if username is None:
 955                         return
 956
 957                 # Log in
 958                 login_form = {
 959                                 'current_form': 'loginForm',
 960                                 'next':         '/',
 961                                 'action_login': 'Log In',
 962                                 'username':     username,
 963                                 'password':     password,
 964                                 }
 965                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
 966                 try:
 967                         self.report_login()
 968                         login_results = urllib2.urlopen(request).read()
 969                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 970                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 971                                 return
 972                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 973                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 974                         return
 975
 976                 # Confirm age
 977                 age_form = {
 978                                 'next_url':             '/',
 979                                 'action_confirm':       'Confirm',
 980                                 }
 981                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
 982                 try:
 983                         self.report_age_confirmation()
 984                         age_results = urllib2.urlopen(request).read()
 985                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 986                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 987                         return
 988
 989         def _real_extract(self, url):
 990                 # Extract video id from URL
 991                 mobj = re.match(self._VALID_URL, url)
 992                 if mobj is None:
 993                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 994                         return
 995                 video_id = mobj.group(2)
 996
 997                 # Get video webpage
 998                 self.report_video_webpage_download(video_id)
 999                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
1000                 try:
1001                         video_webpage = urllib2.urlopen(request).read()
1002                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1003                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1004                         return
1005
1006                 # Attempt to extract SWF player URL
1007                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1008                 if mobj is not None:
1009                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1010                 else:
1011                         player_url = None
1012
1013                 # Get video info
1014                 self.report_video_info_webpage_download(video_id)
1015                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1016                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1017                                            % (video_id, el_type))
1018                         request = urllib2.Request(video_info_url)
1019                         try:
1020                                 video_info_webpage = urllib2.urlopen(request).read()
1021                                 video_info = parse_qs(video_info_webpage)
1022                                 if 'token' in video_info:
1023                                         break
1024                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1025                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1026                                 return
1027                 if 'token' not in video_info:
1028                         if 'reason' in video_info:
1029                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1030                         else:
1031                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1032                         return
1033
1034                 # Start extracting information
1035                 self.report_information_extraction(video_id)
1036
1037                 # uploader
1038                 if 'author' not in video_info:
1039                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1040                         return
1041                 video_uploader = urllib.unquote_plus(video_info['author'][0])
1042
1043                 # title
1044                 if 'title' not in video_info:
1045                         self._downloader.trouble(u'ERROR: unable to extract video title')
1046                         return
1047                 video_title = urllib.unquote_plus(video_info['title'][0])
1048                 video_title = video_title.decode('utf-8')
1049                 video_title = sanitize_title(video_title)
1050
1051                 # simplified title
1052                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1053                 simple_title = simple_title.strip(ur'_')
1054
1055                 # thumbnail image
1056                 if 'thumbnail_url' not in video_info:
1057                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1058                         video_thumbnail = ''
1059                 else:   # don't panic if we can't find it
1060                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1061
1062                 # upload date
1063                 upload_date = u'NA'
1064                 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1065                 if mobj is not None:
1066                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1067                         format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1068                         for expression in format_expressions:
1069                                 try:
1070                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1071                                 except:
1072                                         pass
1073
1074                 # description
1075                 video_description = 'No description available.'
1076                 if self._downloader.params.get('forcedescription', False):
1077                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1078                         if mobj is not None:
1079                                 video_description = mobj.group(1)
1080
1081                 # token
1082                 video_token = urllib.unquote_plus(video_info['token'][0])
1083
1084                 # Decide which formats to download
1085                 req_format = self._downloader.params.get('format', None)
1086
1087                 if 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1088                         url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
1089                         url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs]
1090                         url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data)
1091                         format_limit = self._downloader.params.get('format_limit', None)
1092                         if format_limit is not None and format_limit in self._available_formats:
1093                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
1094                         else:
1095                                 format_list = self._available_formats
1096                         existing_formats = [x for x in format_list if x in url_map]
1097                         if len(existing_formats) == 0:
1098                                 self._downloader.trouble(u'ERROR: no known formats available for video')
1099                                 return
1100                         if req_format is None:
1101                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1102                         elif req_format == '-1':
1103                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1104                         else:
1105                                 # Specific format
1106                                 if req_format not in url_map:
1107                                         self._downloader.trouble(u'ERROR: requested format not available')
1108                                         return
1109                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
1110
1111                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1112                         self.report_rtmp_download()
1113                         video_url_list = [(None, video_info['conn'][0])]
1114
1115                 else:
1116                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
1117                         return
1118
1119                 for format_param, video_real_url in video_url_list:
1120                         # At this point we have a new video
1121                         self._downloader.increment_downloads()
1122
1123                         # Extension
1124                         video_extension = self._video_extensions.get(format_param, 'flv')
1125
1126                         # Find the video URL in fmt_url_map or conn paramters
1127                         try:
1128                                 # Process video information
1129                                 self._downloader.process_info({
1130                                         'id':           video_id.decode('utf-8'),
1131                                         'url':          video_real_url.decode('utf-8'),
1132                                         'uploader':     video_uploader.decode('utf-8'),
1133                                         'upload_date':  upload_date,
1134                                         'title':        video_title,
1135                                         'stitle':       simple_title,
1136                                         'ext':          video_extension.decode('utf-8'),
1137                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1138                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1139                                         'description':  video_description.decode('utf-8'),
1140                                         'player_url':   player_url,
1141                                 })
1142                         except UnavailableVideoError, err:
1143                                 self._downloader.trouble(u'\nERROR: unable to download video')
1144
1145
1146 class MetacafeIE(InfoExtractor):
1147         """Information Extractor for metacafe.com."""
1148
1149         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1150         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1151         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1152         _youtube_ie = None
1153
1154         def __init__(self, youtube_ie, downloader=None):
1155                 InfoExtractor.__init__(self, downloader)
1156                 self._youtube_ie = youtube_ie
1157
1158         @staticmethod
1159         def suitable(url):
1160                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1161
1162         def report_disclaimer(self):
1163                 """Report disclaimer retrieval."""
1164                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1165
1166         def report_age_confirmation(self):
1167                 """Report attempt to confirm age."""
1168                 self._downloader.to_screen(u'[metacafe] Confirming age')
1169
1170         def report_download_webpage(self, video_id):
1171                 """Report webpage download."""
1172                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1173
1174         def report_extraction(self, video_id):
1175                 """Report information extraction."""
1176                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1177
1178         def _real_initialize(self):
1179                 # Retrieve disclaimer
1180                 request = urllib2.Request(self._DISCLAIMER)
1181                 try:
1182                         self.report_disclaimer()
1183                         disclaimer = urllib2.urlopen(request).read()
1184                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1185                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1186                         return
1187
1188                 # Confirm age
1189                 disclaimer_form = {
1190                         'filters': '0',
1191                         'submit': "Continue - I'm over 18",
1192                         }
1193                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1194                 try:
1195                         self.report_age_confirmation()
1196                         disclaimer = urllib2.urlopen(request).read()
1197                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1198                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1199                         return
1200
1201         def _real_extract(self, url):
1202                 # Extract id and simplified title from URL
1203                 mobj = re.match(self._VALID_URL, url)
1204                 if mobj is None:
1205                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1206                         return
1207
1208                 video_id = mobj.group(1)
1209
1210                 # Check if video comes from YouTube
1211                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1212                 if mobj2 is not None:
1213                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1214                         return
1215
1216                 # At this point we have a new video
1217                 self._downloader.increment_downloads()
1218
1219                 simple_title = mobj.group(2).decode('utf-8')
1220
1221                 # Retrieve video webpage to extract further information
1222                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1223                 try:
1224                         self.report_download_webpage(video_id)
1225                         webpage = urllib2.urlopen(request).read()
1226                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1227                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1228                         return
1229
1230                 # Extract URL, uploader and title from webpage
1231                 self.report_extraction(video_id)
1232                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1233                 if mobj is not None:
1234                         mediaURL = urllib.unquote(mobj.group(1))
1235                         video_extension = mediaURL[-3:]
1236
1237                         # Extract gdaKey if available
1238                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1239                         if mobj is None:
1240                                 video_url = mediaURL
1241                         else:
1242                                 gdaKey = mobj.group(1)
1243                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1244                 else:
1245                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1246                         if mobj is None:
1247                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1248                                 return
1249                         vardict = parse_qs(mobj.group(1))
1250                         if 'mediaData' not in vardict:
1251                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1252                                 return
1253                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1254                         if mobj is None:
1255                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1256                                 return
1257                         mediaURL = mobj.group(1).replace('\\/', '/')
1258                         video_extension = mediaURL[-3:]
1259                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1260
1261                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1262                 if mobj is None:
1263                         self._downloader.trouble(u'ERROR: unable to extract title')
1264                         return
1265                 video_title = mobj.group(1).decode('utf-8')
1266                 video_title = sanitize_title(video_title)
1267
1268                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1269                 if mobj is None:
1270                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1271                         return
1272                 video_uploader = mobj.group(1)
1273
1274                 try:
1275                         # Process video information
1276                         self._downloader.process_info({
1277                                 'id':           video_id.decode('utf-8'),
1278                                 'url':          video_url.decode('utf-8'),
1279                                 'uploader':     video_uploader.decode('utf-8'),
1280                                 'upload_date':  u'NA',
1281                                 'title':        video_title,
1282                                 'stitle':       simple_title,
1283                                 'ext':          video_extension.decode('utf-8'),
1284                                 'format':       u'NA',
1285                                 'player_url':   None,
1286                         })
1287                 except UnavailableVideoError:
1288                         self._downloader.trouble(u'\nERROR: unable to download video')
1289
1290
1291 class DailymotionIE(InfoExtractor):
1292         """Information Extractor for Dailymotion"""
1293
1294         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1295
1296         def __init__(self, downloader=None):
1297                 InfoExtractor.__init__(self, downloader)
1298
1299         @staticmethod
1300         def suitable(url):
1301                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1302
1303         def report_download_webpage(self, video_id):
1304                 """Report webpage download."""
1305                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1306
1307         def report_extraction(self, video_id):
1308                 """Report information extraction."""
1309                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1310
1311         def _real_initialize(self):
1312                 return
1313
1314         def _real_extract(self, url):
1315                 # Extract id and simplified title from URL
1316                 mobj = re.match(self._VALID_URL, url)
1317                 if mobj is None:
1318                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1319                         return
1320
1321                 # At this point we have a new video
1322                 self._downloader.increment_downloads()
1323                 video_id = mobj.group(1)
1324
1325                 simple_title = mobj.group(2).decode('utf-8')
1326                 video_extension = 'flv'
1327
1328                 # Retrieve video webpage to extract further information
1329                 request = urllib2.Request(url)
1330                 try:
1331                         self.report_download_webpage(video_id)
1332                         webpage = urllib2.urlopen(request).read()
1333                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1334                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1335                         return
1336
1337                 # Extract URL, uploader and title from webpage
1338                 self.report_extraction(video_id)
1339                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1340                 if mobj is None:
1341                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1342                         return
1343                 mediaURL = urllib.unquote(mobj.group(1))
1344
1345                 # if needed add http://www.dailymotion.com/ if relative URL
1346
1347                 video_url = mediaURL
1348
1349                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1350                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1351                 if mobj is None:
1352                         self._downloader.trouble(u'ERROR: unable to extract title')
1353                         return
1354                 video_title = mobj.group(1).decode('utf-8')
1355                 video_title = sanitize_title(video_title)
1356
1357                 mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
1358                 if mobj is None:
1359                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1360                         return
1361                 video_uploader = mobj.group(1)
1362
1363                 try:
1364                         # Process video information
1365                         self._downloader.process_info({
1366                                 'id':           video_id.decode('utf-8'),
1367                                 'url':          video_url.decode('utf-8'),
1368                                 'uploader':     video_uploader.decode('utf-8'),
1369                                 'upload_date':  u'NA',
1370                                 'title':        video_title,
1371                                 'stitle':       simple_title,
1372                                 'ext':          video_extension.decode('utf-8'),
1373                                 'format':       u'NA',
1374                                 'player_url':   None,
1375                         })
1376                 except UnavailableVideoError:
1377                         self._downloader.trouble(u'\nERROR: unable to download video')
1378
1379 class GoogleIE(InfoExtractor):
1380         """Information extractor for video.google.com."""
1381
1382         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1383
1384         def __init__(self, downloader=None):
1385                 InfoExtractor.__init__(self, downloader)
1386
1387         @staticmethod
1388         def suitable(url):
1389                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1390
1391         def report_download_webpage(self, video_id):
1392                 """Report webpage download."""
1393                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1394
1395         def report_extraction(self, video_id):
1396                 """Report information extraction."""
1397                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1398
1399         def _real_initialize(self):
1400                 return
1401
1402         def _real_extract(self, url):
1403                 # Extract id from URL
1404                 mobj = re.match(self._VALID_URL, url)
1405                 if mobj is None:
1406                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1407                         return
1408
1409                 # At this point we have a new video
1410                 self._downloader.increment_downloads()
1411                 video_id = mobj.group(1)
1412
1413                 video_extension = 'mp4'
1414
1415                 # Retrieve video webpage to extract further information
1416                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1417                 try:
1418                         self.report_download_webpage(video_id)
1419                         webpage = urllib2.urlopen(request).read()
1420                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1421                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1422                         return
1423
1424                 # Extract URL, uploader, and title from webpage
1425                 self.report_extraction(video_id)
1426                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1427                 if mobj is None:
1428                         video_extension = 'flv'
1429                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1430                 if mobj is None:
1431                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1432                         return
1433                 mediaURL = urllib.unquote(mobj.group(1))
1434                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1435                 mediaURL = mediaURL.replace('\\x26', '\x26')
1436
1437                 video_url = mediaURL
1438
1439                 mobj = re.search(r'<title>(.*)</title>', webpage)
1440                 if mobj is None:
1441                         self._downloader.trouble(u'ERROR: unable to extract title')
1442                         return
1443                 video_title = mobj.group(1).decode('utf-8')
1444                 video_title = sanitize_title(video_title)
1445                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1446
1447                 # Extract video description
1448                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1449                 if mobj is None:
1450                         self._downloader.trouble(u'ERROR: unable to extract video description')
1451                         return
1452                 video_description = mobj.group(1).decode('utf-8')
1453                 if not video_description:
1454                         video_description = 'No description available.'
1455
1456                 # Extract video thumbnail
1457                 if self._downloader.params.get('forcethumbnail', False):
1458                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1459                         try:
1460                                 webpage = urllib2.urlopen(request).read()
1461                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1462                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1463                                 return
1464                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1465                         if mobj is None:
1466                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1467                                 return
1468                         video_thumbnail = mobj.group(1)
1469                 else:   # we need something to pass to process_info
1470                         video_thumbnail = ''
1471
1472
1473                 try:
1474                         # Process video information
1475                         self._downloader.process_info({
1476                                 'id':           video_id.decode('utf-8'),
1477                                 'url':          video_url.decode('utf-8'),
1478                                 'uploader':     u'NA',
1479                                 'upload_date':  u'NA',
1480                                 'title':        video_title,
1481                                 'stitle':       simple_title,
1482                                 'ext':          video_extension.decode('utf-8'),
1483                                 'format':       u'NA',
1484                                 'player_url':   None,
1485                         })
1486                 except UnavailableVideoError:
1487                         self._downloader.trouble(u'\nERROR: unable to download video')
1488
1489
1490 class PhotobucketIE(InfoExtractor):
1491         """Information extractor for photobucket.com."""
1492
1493         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1494
1495         def __init__(self, downloader=None):
1496                 InfoExtractor.__init__(self, downloader)
1497
1498         @staticmethod
1499         def suitable(url):
1500                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1501
1502         def report_download_webpage(self, video_id):
1503                 """Report webpage download."""
1504                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1505
1506         def report_extraction(self, video_id):
1507                 """Report information extraction."""
1508                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1509
1510         def _real_initialize(self):
1511                 return
1512
1513         def _real_extract(self, url):
1514                 # Extract id from URL
1515                 mobj = re.match(self._VALID_URL, url)
1516                 if mobj is None:
1517                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1518                         return
1519
1520                 # At this point we have a new video
1521                 self._downloader.increment_downloads()
1522                 video_id = mobj.group(1)
1523
1524                 video_extension = 'flv'
1525
1526                 # Retrieve video webpage to extract further information
1527                 request = urllib2.Request(url)
1528                 try:
1529                         self.report_download_webpage(video_id)
1530                         webpage = urllib2.urlopen(request).read()
1531                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1532                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1533                         return
1534
1535                 # Extract URL, uploader, and title from webpage
1536                 self.report_extraction(video_id)
1537                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1538                 if mobj is None:
1539                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1540                         return
1541                 mediaURL = urllib.unquote(mobj.group(1))
1542
1543                 video_url = mediaURL
1544
1545                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1546                 if mobj is None:
1547                         self._downloader.trouble(u'ERROR: unable to extract title')
1548                         return
1549                 video_title = mobj.group(1).decode('utf-8')
1550                 video_title = sanitize_title(video_title)
1551                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1552
1553                 video_uploader = mobj.group(2).decode('utf-8')
1554
1555                 try:
1556                         # Process video information
1557                         self._downloader.process_info({
1558                                 'id':           video_id.decode('utf-8'),
1559                                 'url':          video_url.decode('utf-8'),
1560                                 'uploader':     video_uploader,
1561                                 'upload_date':  u'NA',
1562                                 'title':        video_title,
1563                                 'stitle':       simple_title,
1564                                 'ext':          video_extension.decode('utf-8'),
1565                                 'format':       u'NA',
1566                                 'player_url':   None,
1567                         })
1568                 except UnavailableVideoError:
1569                         self._downloader.trouble(u'\nERROR: unable to download video')
1570
1571
1572 class YahooIE(InfoExtractor):
1573         """Information extractor for video.yahoo.com."""
1574
1575         # _VALID_URL matches all Yahoo! Video URLs
1576         # _VPAGE_URL matches only the extractable '/watch/' URLs
1577         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1578         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1579
1580         def __init__(self, downloader=None):
1581                 InfoExtractor.__init__(self, downloader)
1582
1583         @staticmethod
1584         def suitable(url):
1585                 return (re.match(YahooIE._VALID_URL, url) is not None)
1586
1587         def report_download_webpage(self, video_id):
1588                 """Report webpage download."""
1589                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1590
1591         def report_extraction(self, video_id):
1592                 """Report information extraction."""
1593                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1594
1595         def _real_initialize(self):
1596                 return
1597
1598         def _real_extract(self, url, new_video=True):
1599                 # Extract ID from URL
1600                 mobj = re.match(self._VALID_URL, url)
1601                 if mobj is None:
1602                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1603                         return
1604
1605                 # At this point we have a new video
1606                 self._downloader.increment_downloads()
1607                 video_id = mobj.group(2)
1608                 video_extension = 'flv'
1609
1610                 # Rewrite valid but non-extractable URLs as
1611                 # extractable English language /watch/ URLs
1612                 if re.match(self._VPAGE_URL, url) is None:
1613                         request = urllib2.Request(url)
1614                         try:
1615                                 webpage = urllib2.urlopen(request).read()
1616                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1617                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1618                                 return
1619
1620                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1621                         if mobj is None:
1622                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1623                                 return
1624                         yahoo_id = mobj.group(1)
1625
1626                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1627                         if mobj is None:
1628                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1629                                 return
1630                         yahoo_vid = mobj.group(1)
1631
1632                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1633                         return self._real_extract(url, new_video=False)
1634
1635                 # Retrieve video webpage to extract further information
1636                 request = urllib2.Request(url)
1637                 try:
1638                         self.report_download_webpage(video_id)
1639                         webpage = urllib2.urlopen(request).read()
1640                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1641                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1642                         return
1643
1644                 # Extract uploader and title from webpage
1645                 self.report_extraction(video_id)
1646                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1647                 if mobj is None:
1648                         self._downloader.trouble(u'ERROR: unable to extract video title')
1649                         return
1650                 video_title = mobj.group(1).decode('utf-8')
1651                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1652
1653                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1654                 if mobj is None:
1655                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1656                         return
1657                 video_uploader = mobj.group(1).decode('utf-8')
1658
1659                 # Extract video thumbnail
1660                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1661                 if mobj is None:
1662                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1663                         return
1664                 video_thumbnail = mobj.group(1).decode('utf-8')
1665
1666                 # Extract video description
1667                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1668                 if mobj is None:
1669                         self._downloader.trouble(u'ERROR: unable to extract video description')
1670                         return
1671                 video_description = mobj.group(1).decode('utf-8')
1672                 if not video_description: video_description = 'No description available.'
1673
1674                 # Extract video height and width
1675                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1676                 if mobj is None:
1677                         self._downloader.trouble(u'ERROR: unable to extract video height')
1678                         return
1679                 yv_video_height = mobj.group(1)
1680
1681                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1682                 if mobj is None:
1683                         self._downloader.trouble(u'ERROR: unable to extract video width')
1684                         return
1685                 yv_video_width = mobj.group(1)
1686
1687                 # Retrieve video playlist to extract media URL
1688                 # I'm not completely sure what all these options are, but we
1689                 # seem to need most of them, otherwise the server sends a 401.
1690                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1691                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1692                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1693                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1694                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1695                 try:
1696                         self.report_download_webpage(video_id)
1697                         webpage = urllib2.urlopen(request).read()
1698                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1699                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1700                         return
1701
1702                 # Extract media URL from playlist XML
1703                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1704                 if mobj is None:
1705                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1706                         return
1707                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1708                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1709
1710                 try:
1711                         # Process video information
1712                         self._downloader.process_info({
1713                                 'id':           video_id.decode('utf-8'),
1714                                 'url':          video_url,
1715                                 'uploader':     video_uploader,
1716                                 'upload_date':  u'NA',
1717                                 'title':        video_title,
1718                                 'stitle':       simple_title,
1719                                 'ext':          video_extension.decode('utf-8'),
1720                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1721                                 'description':  video_description,
1722                                 'thumbnail':    video_thumbnail,
1723                                 'description':  video_description,
1724                                 'player_url':   None,
1725                         })
1726                 except UnavailableVideoError:
1727                         self._downloader.trouble(u'\nERROR: unable to download video')
1728
1729
1730 class GenericIE(InfoExtractor):
1731         """Generic last-resort information extractor."""
1732
1733         def __init__(self, downloader=None):
1734                 InfoExtractor.__init__(self, downloader)
1735
1736         @staticmethod
1737         def suitable(url):
1738                 return True
1739
1740         def report_download_webpage(self, video_id):
1741                 """Report webpage download."""
1742                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1743                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1744
1745         def report_extraction(self, video_id):
1746                 """Report information extraction."""
1747                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1748
1749         def _real_initialize(self):
1750                 return
1751
1752         def _real_extract(self, url):
1753                 # At this point we have a new video
1754                 self._downloader.increment_downloads()
1755
1756                 video_id = url.split('/')[-1]
1757                 request = urllib2.Request(url)
1758                 try:
1759                         self.report_download_webpage(video_id)
1760                         webpage = urllib2.urlopen(request).read()
1761                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1762                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1763                         return
1764                 except ValueError, err:
1765                         # since this is the last-resort InfoExtractor, if
1766                         # this error is thrown, it'll be thrown here
1767                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1768                         return
1769
1770                 self.report_extraction(video_id)
1771                 # Start with something easy: JW Player in SWFObject
1772                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1773                 if mobj is None:
1774                         # Broaden the search a little bit
1775                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1776                 if mobj is None:
1777                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1778                         return
1779
1780                 # It's possible that one of the regexes
1781                 # matched, but returned an empty group:
1782                 if mobj.group(1) is None:
1783                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1784                         return
1785
1786                 video_url = urllib.unquote(mobj.group(1))
1787                 video_id  = os.path.basename(video_url)
1788
1789                 # here's a fun little line of code for you:
1790                 video_extension = os.path.splitext(video_id)[1][1:]
1791                 video_id        = os.path.splitext(video_id)[0]
1792
1793                 # it's tempting to parse this further, but you would
1794                 # have to take into account all the variations like
1795                 #   Video Title - Site Name
1796                 #   Site Name | Video Title
1797                 #   Video Title - Tagline | Site Name
1798                 # and so on and so forth; it's just not practical
1799                 mobj = re.search(r'<title>(.*)</title>', webpage)
1800                 if mobj is None:
1801                         self._downloader.trouble(u'ERROR: unable to extract title')
1802                         return
1803                 video_title = mobj.group(1).decode('utf-8')
1804                 video_title = sanitize_title(video_title)
1805                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1806
1807                 # video uploader is domain name
1808                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1809                 if mobj is None:
1810                         self._downloader.trouble(u'ERROR: unable to extract title')
1811                         return
1812                 video_uploader = mobj.group(1).decode('utf-8')
1813
1814                 try:
1815                         # Process video information
1816                         self._downloader.process_info({
1817                                 'id':           video_id.decode('utf-8'),
1818                                 'url':          video_url.decode('utf-8'),
1819                                 'uploader':     video_uploader,
1820                                 'upload_date':  u'NA',
1821                                 'title':        video_title,
1822                                 'stitle':       simple_title,
1823                                 'ext':          video_extension.decode('utf-8'),
1824                                 'format':       u'NA',
1825                                 'player_url':   None,
1826                         })
1827                 except UnavailableVideoError, err:
1828                         self._downloader.trouble(u'\nERROR: unable to download video')
1829
1830
1831 class YoutubeSearchIE(InfoExtractor):
1832         """Information Extractor for YouTube search queries."""
1833         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1834         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1835         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1836         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1837         _youtube_ie = None
1838         _max_youtube_results = 1000
1839
1840         def __init__(self, youtube_ie, downloader=None):
1841                 InfoExtractor.__init__(self, downloader)
1842                 self._youtube_ie = youtube_ie
1843
1844         @staticmethod
1845         def suitable(url):
1846                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1847
1848         def report_download_page(self, query, pagenum):
1849                 """Report attempt to download playlist page with given number."""
1850                 query = query.decode(preferredencoding())
1851                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1852
1853         def _real_initialize(self):
1854                 self._youtube_ie.initialize()
1855
1856         def _real_extract(self, query):
1857                 mobj = re.match(self._VALID_QUERY, query)
1858                 if mobj is None:
1859                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1860                         return
1861
1862                 prefix, query = query.split(':')
1863                 prefix = prefix[8:]
1864                 query  = query.encode('utf-8')
1865                 if prefix == '':
1866                         self._download_n_results(query, 1)
1867                         return
1868                 elif prefix == 'all':
1869                         self._download_n_results(query, self._max_youtube_results)
1870                         return
1871                 else:
1872                         try:
1873                                 n = long(prefix)
1874                                 if n <= 0:
1875                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1876                                         return
1877                                 elif n > self._max_youtube_results:
1878                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1879                                         n = self._max_youtube_results
1880                                 self._download_n_results(query, n)
1881                                 return
1882                         except ValueError: # parsing prefix as integer fails
1883                                 self._download_n_results(query, 1)
1884                                 return
1885
1886         def _download_n_results(self, query, n):
1887                 """Downloads a specified number of results for a query"""
1888
1889                 video_ids = []
1890                 already_seen = set()
1891                 pagenum = 1
1892
1893                 while True:
1894                         self.report_download_page(query, pagenum)
1895                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1896                         request = urllib2.Request(result_url)
1897                         try:
1898                                 page = urllib2.urlopen(request).read()
1899                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1900                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1901                                 return
1902
1903                         # Extract video identifiers
1904                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1905                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1906                                 if video_id not in already_seen:
1907                                         video_ids.append(video_id)
1908                                         already_seen.add(video_id)
1909                                         if len(video_ids) == n:
1910                                                 # Specified n videos reached
1911                                                 for id in video_ids:
1912                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1913                                                 return
1914
1915                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1916                                 for id in video_ids:
1917                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1918                                 return
1919
1920                         pagenum = pagenum + 1
1921
1922 class GoogleSearchIE(InfoExtractor):
1923         """Information Extractor for Google Video search queries."""
1924         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1925         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1926         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1927         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1928         _google_ie = None
1929         _max_google_results = 1000
1930
1931         def __init__(self, google_ie, downloader=None):
1932                 InfoExtractor.__init__(self, downloader)
1933                 self._google_ie = google_ie
1934
1935         @staticmethod
1936         def suitable(url):
1937                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1938
1939         def report_download_page(self, query, pagenum):
1940                 """Report attempt to download playlist page with given number."""
1941                 query = query.decode(preferredencoding())
1942                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1943
1944         def _real_initialize(self):
1945                 self._google_ie.initialize()
1946
1947         def _real_extract(self, query):
1948                 mobj = re.match(self._VALID_QUERY, query)
1949                 if mobj is None:
1950                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1951                         return
1952
1953                 prefix, query = query.split(':')
1954                 prefix = prefix[8:]
1955                 query  = query.encode('utf-8')
1956                 if prefix == '':
1957                         self._download_n_results(query, 1)
1958                         return
1959                 elif prefix == 'all':
1960                         self._download_n_results(query, self._max_google_results)
1961                         return
1962                 else:
1963                         try:
1964                                 n = long(prefix)
1965                                 if n <= 0:
1966                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1967                                         return
1968                                 elif n > self._max_google_results:
1969                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1970                                         n = self._max_google_results
1971                                 self._download_n_results(query, n)
1972                                 return
1973                         except ValueError: # parsing prefix as integer fails
1974                                 self._download_n_results(query, 1)
1975                                 return
1976
1977         def _download_n_results(self, query, n):
1978                 """Downloads a specified number of results for a query"""
1979
1980                 video_ids = []
1981                 already_seen = set()
1982                 pagenum = 1
1983
1984                 while True:
1985                         self.report_download_page(query, pagenum)
1986                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987                         request = urllib2.Request(result_url)
1988                         try:
1989                                 page = urllib2.urlopen(request).read()
1990                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1991                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1992                                 return
1993
1994                         # Extract video identifiers
1995                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1996                                 video_id = mobj.group(1)
1997                                 if video_id not in already_seen:
1998                                         video_ids.append(video_id)
1999                                         already_seen.add(video_id)
2000                                         if len(video_ids) == n:
2001                                                 # Specified n videos reached
2002                                                 for id in video_ids:
2003                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2004                                                 return
2005
2006                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2007                                 for id in video_ids:
2008                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2009                                 return
2010
2011                         pagenum = pagenum + 1
2012
2013 class YahooSearchIE(InfoExtractor):
2014         """Information Extractor for Yahoo! Video search queries."""
2015         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
2016         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2017         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2018         _MORE_PAGES_INDICATOR = r'\s*Next'
2019         _yahoo_ie = None
2020         _max_yahoo_results = 1000
2021
2022         def __init__(self, yahoo_ie, downloader=None):
2023                 InfoExtractor.__init__(self, downloader)
2024                 self._yahoo_ie = yahoo_ie
2025
2026         @staticmethod
2027         def suitable(url):
2028                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
2029
2030         def report_download_page(self, query, pagenum):
2031                 """Report attempt to download playlist page with given number."""
2032                 query = query.decode(preferredencoding())
2033                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2034
2035         def _real_initialize(self):
2036                 self._yahoo_ie.initialize()
2037
2038         def _real_extract(self, query):
2039                 mobj = re.match(self._VALID_QUERY, query)
2040                 if mobj is None:
2041                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2042                         return
2043
2044                 prefix, query = query.split(':')
2045                 prefix = prefix[8:]
2046                 query  = query.encode('utf-8')
2047                 if prefix == '':
2048                         self._download_n_results(query, 1)
2049                         return
2050                 elif prefix == 'all':
2051                         self._download_n_results(query, self._max_yahoo_results)
2052                         return
2053                 else:
2054                         try:
2055                                 n = long(prefix)
2056                                 if n <= 0:
2057                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2058                                         return
2059                                 elif n > self._max_yahoo_results:
2060                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
2061                                         n = self._max_yahoo_results
2062                                 self._download_n_results(query, n)
2063                                 return
2064                         except ValueError: # parsing prefix as integer fails
2065                                 self._download_n_results(query, 1)
2066                                 return
2067
2068         def _download_n_results(self, query, n):
2069                 """Downloads a specified number of results for a query"""
2070
2071                 video_ids = []
2072                 already_seen = set()
2073                 pagenum = 1
2074
2075                 while True:
2076                         self.report_download_page(query, pagenum)
2077                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2078                         request = urllib2.Request(result_url)
2079                         try:
2080                                 page = urllib2.urlopen(request).read()
2081                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2082                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2083                                 return
2084
2085                         # Extract video identifiers
2086                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2087                                 video_id = mobj.group(1)
2088                                 if video_id not in already_seen:
2089                                         video_ids.append(video_id)
2090                                         already_seen.add(video_id)
2091                                         if len(video_ids) == n:
2092                                                 # Specified n videos reached
2093                                                 for id in video_ids:
2094                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2095                                                 return
2096
2097                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2098                                 for id in video_ids:
2099                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2100                                 return
2101
2102                         pagenum = pagenum + 1
2103
2104 class YoutubePlaylistIE(InfoExtractor):
2105         """Information Extractor for YouTube playlists."""
2106
2107         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2108         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2109         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2110         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2111         _youtube_ie = None
2112
2113         def __init__(self, youtube_ie, downloader=None):
2114                 InfoExtractor.__init__(self, downloader)
2115                 self._youtube_ie = youtube_ie
2116
2117         @staticmethod
2118         def suitable(url):
2119                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2120
2121         def report_download_page(self, playlist_id, pagenum):
2122                 """Report attempt to download playlist page with given number."""
2123                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2124
2125         def _real_initialize(self):
2126                 self._youtube_ie.initialize()
2127
2128         def _real_extract(self, url):
2129                 # Extract playlist id
2130                 mobj = re.match(self._VALID_URL, url)
2131                 if mobj is None:
2132                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2133                         return
2134
2135                 # Single video case
2136                 if mobj.group(3) is not None:
2137                         self._youtube_ie.extract(mobj.group(3))
2138                         return
2139
2140                 # Download playlist pages
2141                 # prefix is 'p' as default for playlists but there are other types that need extra care
2142                 playlist_prefix = mobj.group(1)
2143                 if playlist_prefix == 'a':
2144                         playlist_access = 'artist'
2145                 else:
2146                         playlist_prefix = 'p'
2147                         playlist_access = 'view_play_list'
2148                 playlist_id = mobj.group(2)
2149                 video_ids = []
2150                 pagenum = 1
2151
2152                 while True:
2153                         self.report_download_page(playlist_id, pagenum)
2154                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
2155                         try:
2156                                 page = urllib2.urlopen(request).read()
2157                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2158                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2159                                 return
2160
2161                         # Extract video identifiers
2162                         ids_in_page = []
2163                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2164                                 if mobj.group(1) not in ids_in_page:
2165                                         ids_in_page.append(mobj.group(1))
2166                         video_ids.extend(ids_in_page)
2167
2168                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2169                                 break
2170                         pagenum = pagenum + 1
2171
2172                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2173                 playlistend = self._downloader.params.get('playlistend', -1)
2174                 video_ids = video_ids[playliststart:playlistend]
2175
2176                 for id in video_ids:
2177                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2178                 return
2179
2180 class YoutubeUserIE(InfoExtractor):
2181         """Information Extractor for YouTube users."""
2182
2183         _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2184         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2185         _GDATA_PAGE_SIZE = 50
2186         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2187         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2188         _youtube_ie = None
2189
2190         def __init__(self, youtube_ie, downloader=None):
2191                 InfoExtractor.__init__(self, downloader)
2192                 self._youtube_ie = youtube_ie
2193
2194         @staticmethod
2195         def suitable(url):
2196                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2197
2198         def report_download_page(self, username, start_index):
2199                 """Report attempt to download user page."""
2200                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2201                                            (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2202
2203         def _real_initialize(self):
2204                 self._youtube_ie.initialize()
2205
2206         def _real_extract(self, url):
2207                 # Extract username
2208                 mobj = re.match(self._VALID_URL, url)
2209                 if mobj is None:
2210                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2211                         return
2212
2213                 username = mobj.group(1)
2214
2215                 # Download video ids using YouTube Data API. Result size per
2216                 # query is limited (currently to 50 videos) so we need to query
2217                 # page by page until there are no video ids - it means we got
2218                 # all of them.
2219
2220                 video_ids = []
2221                 pagenum = 0
2222
2223                 while True:
2224                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2225                         self.report_download_page(username, start_index)
2226
2227                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2228
2229                         try:
2230                                 page = urllib2.urlopen(request).read()
2231                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2232                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2233                                 return
2234
2235                         # Extract video identifiers
2236                         ids_in_page = []
2237
2238                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2239                                 if mobj.group(1) not in ids_in_page:
2240                                         ids_in_page.append(mobj.group(1))
2241
2242                         video_ids.extend(ids_in_page)
2243
2244                         # A little optimization - if current page is not
2245                         # "full", ie. does not contain PAGE_SIZE video ids then
2246                         # we can assume that this page is the last one - there
2247                         # are no more ids on further pages - no need to query
2248                         # again.
2249
2250                         if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2251                                 break
2252
2253                         pagenum += 1
2254
2255                 all_ids_count = len(video_ids)
2256                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2257                 playlistend = self._downloader.params.get('playlistend', -1)
2258
2259                 if playlistend == -1:
2260                         video_ids = video_ids[playliststart:]
2261                 else:
2262                         video_ids = video_ids[playliststart:playlistend]
2263
2264                 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2265                                            (username, all_ids_count, len(video_ids)))
2266
2267                 for video_id in video_ids:
2268                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2269
2270
2271 class DepositFilesIE(InfoExtractor):
2272         """Information extractor for depositfiles.com"""
2273
2274         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2275
2276         def __init__(self, downloader=None):
2277                 InfoExtractor.__init__(self, downloader)
2278
2279         @staticmethod
2280         def suitable(url):
2281                 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2282
2283         def report_download_webpage(self, file_id):
2284                 """Report webpage download."""
2285                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2286
2287         def report_extraction(self, file_id):
2288                 """Report information extraction."""
2289                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2290
2291         def _real_initialize(self):
2292                 return
2293
2294         def _real_extract(self, url):
2295                 # At this point we have a new file
2296                 self._downloader.increment_downloads()
2297
2298                 file_id = url.split('/')[-1]
2299                 # Rebuild url in english locale
2300                 url = 'http://depositfiles.com/en/files/' + file_id
2301
2302                 # Retrieve file webpage with 'Free download' button pressed
2303                 free_download_indication = { 'gateway_result' : '1' }
2304                 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2305                 try:
2306                         self.report_download_webpage(file_id)
2307                         webpage = urllib2.urlopen(request).read()
2308                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2309                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2310                         return
2311
2312                 # Search for the real file URL
2313                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2314                 if (mobj is None) or (mobj.group(1) is None):
2315                         # Try to figure out reason of the error.
2316                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2317                         if (mobj is not None) and (mobj.group(1) is not None):
2318                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2319                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2320                         else:
2321                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2322                         return
2323
2324                 file_url = mobj.group(1)
2325                 file_extension = os.path.splitext(file_url)[1][1:]
2326
2327                 # Search for file title
2328                 mobj = re.search(r'<b title="(.*?)">', webpage)
2329                 if mobj is None:
2330                         self._downloader.trouble(u'ERROR: unable to extract title')
2331                         return
2332                 file_title = mobj.group(1).decode('utf-8')
2333
2334                 try:
2335                         # Process file information
2336                         self._downloader.process_info({
2337                                 'id':           file_id.decode('utf-8'),
2338                                 'url':          file_url.decode('utf-8'),
2339                                 'uploader':     u'NA',
2340                                 'upload_date':  u'NA',
2341                                 'title':        file_title,
2342                                 'stitle':       file_title,
2343                                 'ext':          file_extension.decode('utf-8'),
2344                                 'format':       u'NA',
2345                                 'player_url':   None,
2346                         })
2347                 except UnavailableVideoError, err:
2348                         self._downloader.trouble(u'ERROR: unable to download file')
2349
2350 class FacebookIE(InfoExtractor):
2351         """Information Extractor for Facebook"""
2352
2353         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2354         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2355         _NETRC_MACHINE = 'facebook'
2356         _available_formats = ['highqual', 'lowqual']
2357         _video_extensions = {
2358                 'highqual': 'mp4',
2359                 'lowqual': 'mp4',
2360         }
2361
2362         def __init__(self, downloader=None):
2363                 InfoExtractor.__init__(self, downloader)
2364
2365         @staticmethod
2366         def suitable(url):
2367                 return (re.match(FacebookIE._VALID_URL, url) is not None)
2368
2369         def _reporter(self, message):
2370                 """Add header and report message."""
2371                 self._downloader.to_screen(u'[facebook] %s' % message)
2372
2373         def report_login(self):
2374                 """Report attempt to log in."""
2375                 self._reporter(u'Logging in')
2376
2377         def report_video_webpage_download(self, video_id):
2378                 """Report attempt to download video webpage."""
2379                 self._reporter(u'%s: Downloading video webpage' % video_id)
2380
2381         def report_information_extraction(self, video_id):
2382                 """Report attempt to extract video information."""
2383                 self._reporter(u'%s: Extracting video information' % video_id)
2384
2385         def _parse_page(self, video_webpage):
2386                 """Extract video information from page"""
2387                 # General data
2388                 data = {'title': r'class="video_title datawrap">(.*?)</',
2389                         'description': r'<div class="datawrap">(.*?)</div>',
2390                         'owner': r'\("video_owner_name", "(.*?)"\)',
2391                         'upload_date': r'data-date="(.*?)"',
2392                         'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2393                         }
2394                 video_info = {}
2395                 for piece in data.keys():
2396                         mobj = re.search(data[piece], video_webpage)
2397                         if mobj is not None:
2398                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2399
2400                 # Video urls
2401                 video_urls = {}
2402                 for fmt in self._available_formats:
2403                         mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2404                         if mobj is not None:
2405                                 # URL is in a Javascript segment inside an escaped Unicode format within
2406                                 # the generally utf-8 page
2407                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2408                 video_info['video_urls'] = video_urls
2409
2410                 return video_info
2411
2412         def _real_initialize(self):
2413                 if self._downloader is None:
2414                         return
2415
2416                 useremail = None
2417                 password = None
2418                 downloader_params = self._downloader.params
2419
2420                 # Attempt to use provided username and password or .netrc data
2421                 if downloader_params.get('username', None) is not None:
2422                         useremail = downloader_params['username']
2423                         password = downloader_params['password']
2424                 elif downloader_params.get('usenetrc', False):
2425                         try:
2426                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2427                                 if info is not None:
2428                                         useremail = info[0]
2429                                         password = info[2]
2430                                 else:
2431                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2432                         except (IOError, netrc.NetrcParseError), err:
2433                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2434                                 return
2435
2436                 if useremail is None:
2437                         return
2438
2439                 # Log in
2440                 login_form = {
2441                         'email': useremail,
2442                         'pass': password,
2443                         'login': 'Log+In'
2444                         }
2445                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2446                 try:
2447                         self.report_login()
2448                         login_results = urllib2.urlopen(request).read()
2449                         if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2450                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2451                                 return
2452                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2453                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2454                         return
2455
2456         def _real_extract(self, url):
2457                 mobj = re.match(self._VALID_URL, url)
2458                 if mobj is None:
2459                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2460                         return
2461                 video_id = mobj.group('ID')
2462
2463                 # Get video webpage
2464                 self.report_video_webpage_download(video_id)
2465                 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2466                 try:
2467                         page = urllib2.urlopen(request)
2468                         video_webpage = page.read()
2469                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2470                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2471                         return
2472
2473                 # Start extracting information
2474                 self.report_information_extraction(video_id)
2475
2476                 # Extract information
2477                 video_info = self._parse_page(video_webpage)
2478
2479                 # uploader
2480                 if 'owner' not in video_info:
2481                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2482                         return
2483                 video_uploader = video_info['owner']
2484
2485                 # title
2486                 if 'title' not in video_info:
2487                         self._downloader.trouble(u'ERROR: unable to extract video title')
2488                         return
2489                 video_title = video_info['title']
2490                 video_title = video_title.decode('utf-8')
2491                 video_title = sanitize_title(video_title)
2492
2493                 # simplified title
2494                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2495                 simple_title = simple_title.strip(ur'_')
2496
2497                 # thumbnail image
2498                 if 'thumbnail' not in video_info:
2499                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2500                         video_thumbnail = ''
2501                 else:
2502                         video_thumbnail = video_info['thumbnail']
2503
2504                 # upload date
2505                 upload_date = u'NA'
2506                 if 'upload_date' in video_info:
2507                         upload_time = video_info['upload_date']
2508                         timetuple = email.utils.parsedate_tz(upload_time)
2509                         if timetuple is not None:
2510                                 try:
2511                                         upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2512                                 except:
2513                                         pass
2514
2515                 # description
2516                 video_description = 'No description available.'
2517                 if (self._downloader.params.get('forcedescription', False) and
2518                     'description' in video_info):
2519                         video_description = video_info['description']
2520
2521                 url_map = video_info['video_urls']
2522                 if len(url_map.keys()) > 0:
2523                         # Decide which formats to download
2524                         req_format = self._downloader.params.get('format', None)
2525                         format_limit = self._downloader.params.get('format_limit', None)
2526
2527                         if format_limit is not None and format_limit in self._available_formats:
2528                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
2529                         else:
2530                                 format_list = self._available_formats
2531                         existing_formats = [x for x in format_list if x in url_map]
2532                         if len(existing_formats) == 0:
2533                                 self._downloader.trouble(u'ERROR: no known formats available for video')
2534                                 return
2535                         if req_format is None:
2536                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2537                         elif req_format == '-1':
2538                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2539                         else:
2540                                 # Specific format
2541                                 if req_format not in url_map:
2542                                         self._downloader.trouble(u'ERROR: requested format not available')
2543                                         return
2544                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
2545
2546                 for format_param, video_real_url in video_url_list:
2547
2548                         # At this point we have a new video
2549                         self._downloader.increment_downloads()
2550
2551                         # Extension
2552                         video_extension = self._video_extensions.get(format_param, 'mp4')
2553
2554                         # Find the video URL in fmt_url_map or conn paramters
2555                         try:
2556                                 # Process video information
2557                                 self._downloader.process_info({
2558                                         'id':           video_id.decode('utf-8'),
2559                                         'url':          video_real_url.decode('utf-8'),
2560                                         'uploader':     video_uploader.decode('utf-8'),
2561                                         'upload_date':  upload_date,
2562                                         'title':        video_title,
2563                                         'stitle':       simple_title,
2564                                         'ext':          video_extension.decode('utf-8'),
2565                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
2566                                         'thumbnail':    video_thumbnail.decode('utf-8'),
2567                                         'description':  video_description.decode('utf-8'),
2568                                         'player_url':   None,
2569                                 })
2570                         except UnavailableVideoError, err:
2571                                 self._downloader.trouble(u'\nERROR: unable to download video')
2572
2573 class PostProcessor(object):
2574         """Post Processor class.
2575
2576         PostProcessor objects can be added to downloaders with their
2577         add_post_processor() method. When the downloader has finished a
2578         successful download, it will take its internal chain of PostProcessors
2579         and start calling the run() method on each one of them, first with
2580         an initial argument and then with the returned value of the previous
2581         PostProcessor.
2582
2583         The chain will be stopped if one of them ever returns None or the end
2584         of the chain is reached.
2585
2586         PostProcessor objects follow a "mutual registration" process similar
2587         to InfoExtractor objects.
2588         """
2589
2590         _downloader = None
2591
2592         def __init__(self, downloader=None):
2593                 self._downloader = downloader
2594
2595         def set_downloader(self, downloader):
2596                 """Sets the downloader for this PP."""
2597                 self._downloader = downloader
2598
2599         def run(self, information):
2600                 """Run the PostProcessor.
2601
2602                 The "information" argument is a dictionary like the ones
2603                 composed by InfoExtractors. The only difference is that this
2604                 one has an extra field called "filepath" that points to the
2605                 downloaded file.
2606
2607                 When this method returns None, the postprocessing chain is
2608                 stopped. However, this method may return an information
2609                 dictionary that will be passed to the next postprocessing
2610                 object in the chain. It can be the one it received after
2611                 changing some fields.
2612
2613                 In addition, this method may raise a PostProcessingError
2614                 exception that will be taken into account by the downloader
2615                 it was called from.
2616                 """
2617                 return information # by default, do nothing
2618
2619 class FFmpegExtractAudioPP(PostProcessor):
2620
2621         def __init__(self, downloader=None, preferredcodec=None):
2622                 PostProcessor.__init__(self, downloader)
2623                 if preferredcodec is None:
2624                         preferredcodec = 'best'
2625                 self._preferredcodec = preferredcodec
2626
2627         @staticmethod
2628         def get_audio_codec(path):
2629                 try:
2630                         cmd = ['ffprobe', '-show_streams', '--', path]
2631                         handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
2632                         output = handle.communicate()[0]
2633                         if handle.wait() != 0:
2634                                 return None
2635                 except (IOError, OSError):
2636                         return None
2637                 audio_codec = None
2638                 for line in output.split('\n'):
2639                         if line.startswith('codec_name='):
2640                                 audio_codec = line.split('=')[1].strip()
2641                         elif line.strip() == 'codec_type=audio' and audio_codec is not None:
2642                                 return audio_codec
2643                 return None
2644
2645         @staticmethod
2646         def run_ffmpeg(path, out_path, codec, more_opts):
2647                 try:
2648                         cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
2649                         ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
2650                         return (ret == 0)
2651                 except (IOError, OSError):
2652                         return False
2653
2654         def run(self, information):
2655                 path = information['filepath']
2656
2657                 filecodec = self.get_audio_codec(path)
2658                 if filecodec is None:
2659                         self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
2660                         return None
2661
2662                 more_opts = []
2663                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
2664                         if filecodec == 'aac' or filecodec == 'mp3':
2665                                 # Lossless if possible
2666                                 acodec = 'copy'
2667                                 extension = filecodec
2668                                 if filecodec == 'aac':
2669                                         more_opts = ['-f', 'adts']
2670                         else:
2671                                 # MP3 otherwise.
2672                                 acodec = 'libmp3lame'
2673                                 extension = 'mp3'
2674                                 more_opts = ['-ab', '128k']
2675                 else:
2676                         # We convert the audio (lossy)
2677                         acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
2678                         extension = self._preferredcodec
2679                         more_opts = ['-ab', '128k']
2680                         if self._preferredcodec == 'aac':
2681                                 more_opts += ['-f', 'adts']
2682
2683                 (prefix, ext) = os.path.splitext(path)
2684                 new_path = prefix + '.' + extension
2685                 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
2686                 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
2687
2688                 if not status:
2689                         self._downloader.to_stderr(u'WARNING: error running ffmpeg')
2690                         return None
2691
2692                 try:
2693                         os.remove(path)
2694                 except (IOError, OSError):
2695                         self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
2696                         return None
2697
2698                 information['filepath'] = new_path
2699                 return information
2700
2701 ### MAIN PROGRAM ###
2702 if __name__ == '__main__':
2703         try:
2704                 # Modules needed only when running the main program
2705                 import getpass
2706                 import optparse
2707
2708                 # Function to update the program file with the latest version from the repository.
2709                 def update_self(downloader, filename):
2710                         # Note: downloader only used for options
2711                         if not os.access(filename, os.W_OK):
2712                                 sys.exit('ERROR: no write permissions on %s' % filename)
2713
2714                         downloader.to_screen('Updating to latest stable version...')
2715                         try:
2716                                 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2717                                 latest_version = urllib.urlopen(latest_url).read().strip()
2718                                 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2719                                 newcontent = urllib.urlopen(prog_url).read()
2720                         except (IOError, OSError), err:
2721                                 sys.exit('ERROR: unable to download latest version')
2722                         try:
2723                                 stream = open(filename, 'w')
2724                                 stream.write(newcontent)
2725                                 stream.close()
2726                         except (IOError, OSError), err:
2727                                 sys.exit('ERROR: unable to overwrite current version')
2728                         downloader.to_screen('Updated to version %s' % latest_version)
2729
2730                 # Parse command line
2731                 parser = optparse.OptionParser(
2732                         usage='Usage: %prog [options] url...',
2733                         version='2011.08.04',
2734                         conflict_handler='resolve',
2735                 )
2736
2737                 parser.add_option('-h', '--help',
2738                                 action='help', help='print this help text and exit')
2739                 parser.add_option('-v', '--version',
2740                                 action='version', help='print program version and exit')
2741                 parser.add_option('-U', '--update',
2742                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2743                 parser.add_option('-i', '--ignore-errors',
2744                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2745                 parser.add_option('-r', '--rate-limit',
2746                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2747                 parser.add_option('-R', '--retries',
2748                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2749                 parser.add_option('--playlist-start',
2750                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2751                 parser.add_option('--playlist-end',
2752                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2753                 parser.add_option('--dump-user-agent',
2754                                 action='store_true', dest='dump_user_agent',
2755                                 help='display the current browser identification', default=False)
2756
2757                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2758                 authentication.add_option('-u', '--username',
2759                                 dest='username', metavar='USERNAME', help='account username')
2760                 authentication.add_option('-p', '--password',
2761                                 dest='password', metavar='PASSWORD', help='account password')
2762                 authentication.add_option('-n', '--netrc',
2763                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2764                 parser.add_option_group(authentication)
2765
2766                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2767                 video_format.add_option('-f', '--format',
2768                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2769                 video_format.add_option('--all-formats',
2770                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2771                 video_format.add_option('--max-quality',
2772                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2773                 parser.add_option_group(video_format)
2774
2775                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2776                 verbosity.add_option('-q', '--quiet',
2777                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2778                 verbosity.add_option('-s', '--simulate',
2779                                 action='store_true', dest='simulate', help='do not download video', default=False)
2780                 verbosity.add_option('-g', '--get-url',
2781                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2782                 verbosity.add_option('-e', '--get-title',
2783                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2784                 verbosity.add_option('--get-thumbnail',
2785                                 action='store_true', dest='getthumbnail',
2786                                 help='simulate, quiet but print thumbnail URL', default=False)
2787                 verbosity.add_option('--get-description',
2788                                 action='store_true', dest='getdescription',
2789                                 help='simulate, quiet but print video description', default=False)
2790                 verbosity.add_option('--get-filename',
2791                                 action='store_true', dest='getfilename',
2792                                 help='simulate, quiet but print output filename', default=False)
2793                 verbosity.add_option('--no-progress',
2794                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2795                 verbosity.add_option('--console-title',
2796                                 action='store_true', dest='consoletitle',
2797                                 help='display progress in console titlebar', default=False)
2798                 parser.add_option_group(verbosity)
2799
2800                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2801                 filesystem.add_option('-t', '--title',
2802                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2803                 filesystem.add_option('-l', '--literal',
2804                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2805                 filesystem.add_option('-A', '--auto-number',
2806                                 action='store_true', dest='autonumber',
2807                                 help='number downloaded files starting from 00000', default=False)
2808                 filesystem.add_option('-o', '--output',
2809                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2810                 filesystem.add_option('-a', '--batch-file',
2811                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2812                 filesystem.add_option('-w', '--no-overwrites',
2813                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2814                 filesystem.add_option('-c', '--continue',
2815                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2816                 filesystem.add_option('--cookies',
2817                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2818                 filesystem.add_option('--no-part',
2819                                 action='store_true', dest='nopart', help='do not use .part files', default=False)
2820                 filesystem.add_option('--no-mtime',
2821                                 action='store_false', dest='updatetime',
2822                                 help='do not use the Last-modified header to set the file modification time', default=True)
2823                 parser.add_option_group(filesystem)
2824
2825                 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
2826                 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
2827                                 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
2828                 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
2829                                 help='"best", "aac" or "mp3"; best by default')
2830                 parser.add_option_group(postproc)
2831
2832                 (opts, args) = parser.parse_args()
2833
2834                 # Open appropriate CookieJar
2835                 if opts.cookiefile is None:
2836                         jar = cookielib.CookieJar()
2837                 else:
2838                         try:
2839                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2840                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2841                                         jar.load()
2842                         except (IOError, OSError), err:
2843                                 sys.exit(u'ERROR: unable to open cookie file')
2844
2845                 # Dump user agent
2846                 if opts.dump_user_agent:
2847                         print std_headers['User-Agent']
2848                         sys.exit(0)
2849
2850                 # General configuration
2851                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2852                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
2853                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2854
2855                 # Batch file verification
2856                 batchurls = []
2857                 if opts.batchfile is not None:
2858                         try:
2859                                 if opts.batchfile == '-':
2860                                         batchfd = sys.stdin
2861                                 else:
2862                                         batchfd = open(opts.batchfile, 'r')
2863                                 batchurls = batchfd.readlines()
2864                                 batchurls = [x.strip() for x in batchurls]
2865                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2866                         except IOError:
2867                                 sys.exit(u'ERROR: batch file could not be read')
2868                 all_urls = batchurls + args
2869
2870                 # Conflicting, missing and erroneous options
2871                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2872                         parser.error(u'using .netrc conflicts with giving username/password')
2873                 if opts.password is not None and opts.username is None:
2874                         parser.error(u'account username missing')
2875                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2876                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2877                 if opts.usetitle and opts.useliteral:
2878                         parser.error(u'using title conflicts with using literal title')
2879                 if opts.username is not None and opts.password is None:
2880                         opts.password = getpass.getpass(u'Type account password and press return:')
2881                 if opts.ratelimit is not None:
2882                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2883                         if numeric_limit is None:
2884                                 parser.error(u'invalid rate limit specified')
2885                         opts.ratelimit = numeric_limit
2886                 if opts.retries is not None:
2887                         try:
2888                                 opts.retries = long(opts.retries)
2889                         except (TypeError, ValueError), err:
2890                                 parser.error(u'invalid retry count specified')
2891                 try:
2892                         opts.playliststart = long(opts.playliststart)
2893                         if opts.playliststart <= 0:
2894                                 raise ValueError
2895                 except (TypeError, ValueError), err:
2896                         parser.error(u'invalid playlist start number specified')
2897                 try:
2898                         opts.playlistend = long(opts.playlistend)
2899                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2900                                 raise ValueError
2901                 except (TypeError, ValueError), err:
2902                         parser.error(u'invalid playlist end number specified')
2903                 if opts.extractaudio:
2904                         if opts.audioformat not in ['best', 'aac', 'mp3']:
2905                                 parser.error(u'invalid audio format specified')
2906
2907                 # Information extractors
2908                 youtube_ie = YoutubeIE()
2909                 metacafe_ie = MetacafeIE(youtube_ie)
2910                 dailymotion_ie = DailymotionIE()
2911                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2912                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2913                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2914                 google_ie = GoogleIE()
2915                 google_search_ie = GoogleSearchIE(google_ie)
2916                 photobucket_ie = PhotobucketIE()
2917                 yahoo_ie = YahooIE()
2918                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2919                 deposit_files_ie = DepositFilesIE()
2920                 facebook_ie = FacebookIE()
2921                 generic_ie = GenericIE()
2922
2923                 # File downloader
2924                 fd = FileDownloader({
2925                         'usenetrc': opts.usenetrc,
2926                         'username': opts.username,
2927                         'password': opts.password,
2928                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
2929                         'forceurl': opts.geturl,
2930                         'forcetitle': opts.gettitle,
2931                         'forcethumbnail': opts.getthumbnail,
2932                         'forcedescription': opts.getdescription,
2933                         'forcefilename': opts.getfilename,
2934                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
2935                         'format': opts.format,
2936                         'format_limit': opts.format_limit,
2937                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2938                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2939                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2940                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2941                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2942                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2943                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2944                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2945                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2946                                 or u'%(id)s.%(ext)s'),
2947                         'ignoreerrors': opts.ignoreerrors,
2948                         'ratelimit': opts.ratelimit,
2949                         'nooverwrites': opts.nooverwrites,
2950                         'retries': opts.retries,
2951                         'continuedl': opts.continue_dl,
2952                         'noprogress': opts.noprogress,
2953                         'playliststart': opts.playliststart,
2954                         'playlistend': opts.playlistend,
2955                         'logtostderr': opts.outtmpl == '-',
2956                         'consoletitle': opts.consoletitle,
2957                         'nopart': opts.nopart,
2958                         'updatetime': opts.updatetime,
2959                         })
2960                 fd.add_info_extractor(youtube_search_ie)
2961                 fd.add_info_extractor(youtube_pl_ie)
2962                 fd.add_info_extractor(youtube_user_ie)
2963                 fd.add_info_extractor(metacafe_ie)
2964                 fd.add_info_extractor(dailymotion_ie)
2965                 fd.add_info_extractor(youtube_ie)
2966                 fd.add_info_extractor(google_ie)
2967                 fd.add_info_extractor(google_search_ie)
2968                 fd.add_info_extractor(photobucket_ie)
2969                 fd.add_info_extractor(yahoo_ie)
2970                 fd.add_info_extractor(yahoo_search_ie)
2971                 fd.add_info_extractor(deposit_files_ie)
2972                 fd.add_info_extractor(facebook_ie)
2973
2974                 # This must come last since it's the
2975                 # fallback if none of the others work
2976                 fd.add_info_extractor(generic_ie)
2977
2978                 # PostProcessors
2979                 if opts.extractaudio:
2980                         fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
2981
2982                 # Update version
2983                 if opts.update_self:
2984                         update_self(fd, sys.argv[0])
2985
2986                 # Maybe do nothing
2987                 if len(all_urls) < 1:
2988                         if not opts.update_self:
2989                                 parser.error(u'you must provide at least one URL')
2990                         else:
2991                                 sys.exit()
2992                 retcode = fd.download(all_urls)
2993
2994                 # Dump cookie jar if requested
2995                 if opts.cookiefile is not None:
2996                         try:
2997                                 jar.save()
2998                         except (IOError, OSError), err:
2999                                 sys.exit(u'ERROR: unable to save cookie jar')
3000
3001                 sys.exit(retcode)
3002
3003         except DownloadError:
3004                 sys.exit(1)
3005         except SameFileError:
3006                 sys.exit(u'ERROR: fixed output name but more than one file to download')
3007         except KeyboardInterrupt:
3008                 sys.exit(u'\nERROR: Interrupted by user')
3009
3010 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: