youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # Author: Ricardo Garcia Gonzalez
   4 # Author: Danny Colligan
   5 # Author: Benjamin Johnson
   6 # Author: Vasyl' Vavrychuk
   7 # Author: Witold Baryluk
   8 # Author: Paweł Paprota
   9 # Author: Gergely Imreh
  10 # License: Public domain code
  11 import cookielib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import gzip
  16 import htmlentitydefs
  17 import httplib
  18 import json # TODO: json for 2.5
  19 import locale
  20 import math
  21 import netrc
  22 import os
  23 import os.path
  24 import re
  25 import socket
  26 import string
  27 import StringIO
  28 import subprocess
  29 import sys
  30 import time
  31 import urllib
  32 import urllib2
  33 import zlib
  34
  35 # parse_qs was moved from the cgi module to the urlparse module recently.
  36 try:
  37         from urlparse import parse_qs
  38 except ImportError:
  39         from cgi import parse_qs
  40
  41 std_headers = {
  42         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11',
  43         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  44         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  45         'Accept-Encoding': 'gzip, deflate',
  46         'Accept-Language': 'en-us,en;q=0.5',
  47 }
  48
  49 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  50
  51 def preferredencoding():
  52         """Get preferred encoding.
  53
  54         Returns the best encoding scheme for the system, based on
  55         locale.getpreferredencoding() and some further tweaks.
  56         """
  57         def yield_preferredencoding():
  58                 try:
  59                         pref = locale.getpreferredencoding()
  60                         u'TEST'.encode(pref)
  61                 except:
  62                         pref = 'UTF-8'
  63                 while True:
  64                         yield pref
  65         return yield_preferredencoding().next()
  66
  67 def htmlentity_transform(matchobj):
  68         """Transforms an HTML entity to a Unicode character.
  69
  70         This function receives a match object and is intended to be used with
  71         the re.sub() function.
  72         """
  73         entity = matchobj.group(1)
  74
  75         # Known non-numeric HTML entity
  76         if entity in htmlentitydefs.name2codepoint:
  77                 return unichr(htmlentitydefs.name2codepoint[entity])
  78
  79         # Unicode character
  80         mobj = re.match(ur'(?u)#(x?\d+)', entity)
  81         if mobj is not None:
  82                 numstr = mobj.group(1)
  83                 if numstr.startswith(u'x'):
  84                         base = 16
  85                         numstr = u'0%s' % numstr
  86                 else:
  87                         base = 10
  88                 return unichr(long(numstr, base))
  89
  90         # Unknown entity in name, return its literal representation
  91         return (u'&%s;' % entity)
  92
  93 def sanitize_title(utitle):
  94         """Sanitizes a video title so it could be used as part of a filename."""
  95         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
  96         return utitle.replace(unicode(os.sep), u'%')
  97
  98 def sanitize_open(filename, open_mode):
  99         """Try to open the given filename, and slightly tweak it if this fails.
 100
 101         Attempts to open the given filename. If this fails, it tries to change
 102         the filename slightly, step by step, until it's either able to open it
 103         or it fails and raises a final exception, like the standard open()
 104         function.
 105
 106         It returns the tuple (stream, definitive_file_name).
 107         """
 108         try:
 109                 if filename == u'-':
 110                         if sys.platform == 'win32':
 111                                 import msvcrt
 112                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 113                         return (sys.stdout, filename)
 114                 stream = open(filename, open_mode)
 115                 return (stream, filename)
 116         except (IOError, OSError), err:
 117                 # In case of error, try to remove win32 forbidden chars
 118                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
 119
 120                 # An exception here should be caught in the caller
 121                 stream = open(filename, open_mode)
 122                 return (stream, filename)
 123
 124 def timeconvert(timestr):
 125     """Convert RFC 2822 defined time string into system timestamp"""
 126     timestamp = None
 127     timetuple = email.utils.parsedate_tz(timestr)
 128     if timetuple is not None:
 129         timestamp = email.utils.mktime_tz(timetuple)
 130     return timestamp
 131
 132 class DownloadError(Exception):
 133         """Download Error exception.
 134
 135         This exception may be thrown by FileDownloader objects if they are not
 136         configured to continue on errors. They will contain the appropriate
 137         error message.
 138         """
 139         pass
 140
 141 class SameFileError(Exception):
 142         """Same File exception.
 143
 144         This exception will be thrown by FileDownloader objects if they detect
 145         multiple files would have to be downloaded to the same file on disk.
 146         """
 147         pass
 148
 149 class PostProcessingError(Exception):
 150         """Post Processing exception.
 151
 152         This exception may be raised by PostProcessor's .run() method to
 153         indicate an error in the postprocessing task.
 154         """
 155         pass
 156
 157 class UnavailableVideoError(Exception):
 158         """Unavailable Format exception.
 159
 160         This exception will be thrown when a video is requested
 161         in a format that is not available for that video.
 162         """
 163         pass
 164
 165 class ContentTooShortError(Exception):
 166         """Content Too Short exception.
 167
 168         This exception may be raised by FileDownloader objects when a file they
 169         download is too small for what the server announced first, indicating
 170         the connection was probably interrupted.
 171         """
 172         # Both in bytes
 173         downloaded = None
 174         expected = None
 175
 176         def __init__(self, downloaded, expected):
 177                 self.downloaded = downloaded
 178                 self.expected = expected
 179
 180 class YoutubeDLHandler(urllib2.HTTPHandler):
 181         """Handler for HTTP requests and responses.
 182
 183         This class, when installed with an OpenerDirector, automatically adds
 184         the standard headers to every HTTP request and handles gzipped and
 185         deflated responses from web servers. If compression is to be avoided in
 186         a particular request, the original request in the program code only has
 187         to include the HTTP header "Youtubedl-No-Compression", which will be
 188         removed before making the real request.
 189
 190         Part of this code was copied from:
 191
 192           http://techknack.net/python-urllib2-handlers/
 193
 194         Andrew Rowls, the author of that code, agreed to release it to the
 195         public domain.
 196         """
 197
 198         @staticmethod
 199         def deflate(data):
 200                 try:
 201                         return zlib.decompress(data, -zlib.MAX_WBITS)
 202                 except zlib.error:
 203                         return zlib.decompress(data)
 204
 205         @staticmethod
 206         def addinfourl_wrapper(stream, headers, url, code):
 207                 if hasattr(urllib2.addinfourl, 'getcode'):
 208                         return urllib2.addinfourl(stream, headers, url, code)
 209                 ret = urllib2.addinfourl(stream, headers, url)
 210                 ret.code = code
 211                 return ret
 212
 213         def http_request(self, req):
 214                 for h in std_headers:
 215                         if h in req.headers:
 216                                 del req.headers[h]
 217                         req.add_header(h, std_headers[h])
 218                 if 'Youtubedl-no-compression' in req.headers:
 219                         if 'Accept-encoding' in req.headers:
 220                                 del req.headers['Accept-encoding']
 221                         del req.headers['Youtubedl-no-compression']
 222                 return req
 223
 224         def http_response(self, req, resp):
 225                 old_resp = resp
 226                 # gzip
 227                 if resp.headers.get('Content-encoding', '') == 'gzip':
 228                         gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
 229                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 230                         resp.msg = old_resp.msg
 231                 # deflate
 232                 if resp.headers.get('Content-encoding', '') == 'deflate':
 233                         gz = StringIO.StringIO(self.deflate(resp.read()))
 234                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 235                         resp.msg = old_resp.msg
 236                 return resp
 237
 238 class FileDownloader(object):
 239         """File Downloader class.
 240
 241         File downloader objects are the ones responsible of downloading the
 242         actual video file and writing it to disk if the user has requested
 243         it, among some other tasks. In most cases there should be one per
 244         program. As, given a video URL, the downloader doesn't know how to
 245         extract all the needed information, task that InfoExtractors do, it
 246         has to pass the URL to one of them.
 247
 248         For this, file downloader objects have a method that allows
 249         InfoExtractors to be registered in a given order. When it is passed
 250         a URL, the file downloader handles it to the first InfoExtractor it
 251         finds that reports being able to handle it. The InfoExtractor extracts
 252         all the information about the video or videos the URL refers to, and
 253         asks the FileDownloader to process the video information, possibly
 254         downloading the video.
 255
 256         File downloaders accept a lot of parameters. In order not to saturate
 257         the object constructor with arguments, it receives a dictionary of
 258         options instead. These options are available through the params
 259         attribute for the InfoExtractors to use. The FileDownloader also
 260         registers itself as the downloader in charge for the InfoExtractors
 261         that are added to it, so this is a "mutual registration".
 262
 263         Available options:
 264
 265         username:         Username for authentication purposes.
 266         password:         Password for authentication purposes.
 267         usenetrc:         Use netrc for authentication instead.
 268         quiet:            Do not print messages to stdout.
 269         forceurl:         Force printing final URL.
 270         forcetitle:       Force printing title.
 271         forcethumbnail:   Force printing thumbnail URL.
 272         forcedescription: Force printing description.
 273         forcefilename:    Force printing final filename.
 274         simulate:         Do not download the video files.
 275         format:           Video format code.
 276         format_limit:     Highest quality format to try.
 277         outtmpl:          Template for output names.
 278         ignoreerrors:     Do not stop on download errors.
 279         ratelimit:        Download speed limit, in bytes/sec.
 280         nooverwrites:     Prevent overwriting files.
 281         retries:          Number of times to retry for HTTP error 5xx
 282         continuedl:       Try to continue downloads if possible.
 283         noprogress:       Do not print the progress bar.
 284         playliststart:    Playlist item to start at.
 285         playlistend:      Playlist item to end at.
 286         logtostderr:      Log messages to stderr instead of stdout.
 287         consoletitle:     Display progress in console window's titlebar.
 288         nopart:           Do not use temporary .part files.
 289         updatetime:       Use the Last-modified header to set output file timestamps.
 290         """
 291
 292         params = None
 293         _ies = []
 294         _pps = []
 295         _download_retcode = None
 296         _num_downloads = None
 297         _screen_file = None
 298
 299         def __init__(self, params):
 300                 """Create a FileDownloader object with the given options."""
 301                 self._ies = []
 302                 self._pps = []
 303                 self._download_retcode = 0
 304                 self._num_downloads = 0
 305                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 306                 self.params = params
 307
 308         @staticmethod
 309         def pmkdir(filename):
 310                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 311                 components = filename.split(os.sep)
 312                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 313                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 314                 for dir in aggregate:
 315                         if not os.path.exists(dir):
 316                                 os.mkdir(dir)
 317
 318         @staticmethod
 319         def format_bytes(bytes):
 320                 if bytes is None:
 321                         return 'N/A'
 322                 if type(bytes) is str:
 323                         bytes = float(bytes)
 324                 if bytes == 0.0:
 325                         exponent = 0
 326                 else:
 327                         exponent = long(math.log(bytes, 1024.0))
 328                 suffix = 'bkMGTPEZY'[exponent]
 329                 converted = float(bytes) / float(1024**exponent)
 330                 return '%.2f%s' % (converted, suffix)
 331
 332         @staticmethod
 333         def calc_percent(byte_counter, data_len):
 334                 if data_len is None:
 335                         return '---.-%'
 336                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 337
 338         @staticmethod
 339         def calc_eta(start, now, total, current):
 340                 if total is None:
 341                         return '--:--'
 342                 dif = now - start
 343                 if current == 0 or dif < 0.001: # One millisecond
 344                         return '--:--'
 345                 rate = float(current) / dif
 346                 eta = long((float(total) - float(current)) / rate)
 347                 (eta_mins, eta_secs) = divmod(eta, 60)
 348                 if eta_mins > 99:
 349                         return '--:--'
 350                 return '%02d:%02d' % (eta_mins, eta_secs)
 351
 352         @staticmethod
 353         def calc_speed(start, now, bytes):
 354                 dif = now - start
 355                 if bytes == 0 or dif < 0.001: # One millisecond
 356                         return '%10s' % '---b/s'
 357                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 358
 359         @staticmethod
 360         def best_block_size(elapsed_time, bytes):
 361                 new_min = max(bytes / 2.0, 1.0)
 362                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 363                 if elapsed_time < 0.001:
 364                         return long(new_max)
 365                 rate = bytes / elapsed_time
 366                 if rate > new_max:
 367                         return long(new_max)
 368                 if rate < new_min:
 369                         return long(new_min)
 370                 return long(rate)
 371
 372         @staticmethod
 373         def parse_bytes(bytestr):
 374                 """Parse a string indicating a byte quantity into a long integer."""
 375                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 376                 if matchobj is None:
 377                         return None
 378                 number = float(matchobj.group(1))
 379                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 380                 return long(round(number * multiplier))
 381
 382         def add_info_extractor(self, ie):
 383                 """Add an InfoExtractor object to the end of the list."""
 384                 self._ies.append(ie)
 385                 ie.set_downloader(self)
 386
 387         def add_post_processor(self, pp):
 388                 """Add a PostProcessor object to the end of the chain."""
 389                 self._pps.append(pp)
 390                 pp.set_downloader(self)
 391
 392         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
 393                 """Print message to stdout if not in quiet mode."""
 394                 try:
 395                         if not self.params.get('quiet', False):
 396                                 terminator = [u'\n', u''][skip_eol]
 397                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
 398                         self._screen_file.flush()
 399                 except (UnicodeEncodeError), err:
 400                         if not ignore_encoding_errors:
 401                                 raise
 402
 403         def to_stderr(self, message):
 404                 """Print message to stderr."""
 405                 print >>sys.stderr, message.encode(preferredencoding())
 406
 407         def to_cons_title(self, message):
 408                 """Set console/terminal window title to message."""
 409                 if not self.params.get('consoletitle', False):
 410                         return
 411                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 412                         # c_wchar_p() might not be necessary if `message` is
 413                         # already of type unicode()
 414                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 415                 elif 'TERM' in os.environ:
 416                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 417
 418         def fixed_template(self):
 419                 """Checks if the output template is fixed."""
 420                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 421
 422         def trouble(self, message=None):
 423                 """Determine action to take when a download problem appears.
 424
 425                 Depending on if the downloader has been configured to ignore
 426                 download errors or not, this method may throw an exception or
 427                 not when errors are found, after printing the message.
 428                 """
 429                 if message is not None:
 430                         self.to_stderr(message)
 431                 if not self.params.get('ignoreerrors', False):
 432                         raise DownloadError(message)
 433                 self._download_retcode = 1
 434
 435         def slow_down(self, start_time, byte_counter):
 436                 """Sleep if the download speed is over the rate limit."""
 437                 rate_limit = self.params.get('ratelimit', None)
 438                 if rate_limit is None or byte_counter == 0:
 439                         return
 440                 now = time.time()
 441                 elapsed = now - start_time
 442                 if elapsed <= 0.0:
 443                         return
 444                 speed = float(byte_counter) / elapsed
 445                 if speed > rate_limit:
 446                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 447
 448         def temp_name(self, filename):
 449                 """Returns a temporary filename for the given filename."""
 450                 if self.params.get('nopart', False) or filename == u'-' or \
 451                                 (os.path.exists(filename) and not os.path.isfile(filename)):
 452                         return filename
 453                 return filename + u'.part'
 454
 455         def undo_temp_name(self, filename):
 456                 if filename.endswith(u'.part'):
 457                         return filename[:-len(u'.part')]
 458                 return filename
 459
 460         def try_rename(self, old_filename, new_filename):
 461                 try:
 462                         if old_filename == new_filename:
 463                                 return
 464                         os.rename(old_filename, new_filename)
 465                 except (IOError, OSError), err:
 466                         self.trouble(u'ERROR: unable to rename file')
 467
 468         def try_utime(self, filename, last_modified_hdr):
 469                 """Try to set the last-modified time of the given file."""
 470                 if last_modified_hdr is None:
 471                         return
 472                 if not os.path.isfile(filename):
 473                         return
 474                 timestr = last_modified_hdr
 475                 if timestr is None:
 476                         return
 477                 filetime = timeconvert(timestr)
 478                 if filetime is None:
 479                         return
 480                 try:
 481                         os.utime(filename,(time.time(), filetime))
 482                 except:
 483                         pass
 484
 485         def report_destination(self, filename):
 486                 """Report destination filename."""
 487                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
 488
 489         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 490                 """Report download progress."""
 491                 if self.params.get('noprogress', False):
 492                         return
 493                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 494                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 495                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 496                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 497
 498         def report_resuming_byte(self, resume_len):
 499                 """Report attempt to resume at given byte."""
 500                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 501
 502         def report_retry(self, count, retries):
 503                 """Report retry in case of HTTP error 5xx"""
 504                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 505
 506         def report_file_already_downloaded(self, file_name):
 507                 """Report file has already been fully downloaded."""
 508                 try:
 509                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
 510                 except (UnicodeEncodeError), err:
 511                         self.to_screen(u'[download] The file has already been downloaded')
 512
 513         def report_unable_to_resume(self):
 514                 """Report it was impossible to resume download."""
 515                 self.to_screen(u'[download] Unable to resume')
 516
 517         def report_finish(self):
 518                 """Report download finished."""
 519                 if self.params.get('noprogress', False):
 520                         self.to_screen(u'[download] Download completed')
 521                 else:
 522                         self.to_screen(u'')
 523
 524         def increment_downloads(self):
 525                 """Increment the ordinal that assigns a number to each file."""
 526                 self._num_downloads += 1
 527
 528         def prepare_filename(self, info_dict):
 529                 """Generate the output filename."""
 530                 try:
 531                         template_dict = dict(info_dict)
 532                         template_dict['epoch'] = unicode(long(time.time()))
 533                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 534                         filename = self.params['outtmpl'] % template_dict
 535                         return filename
 536                 except (ValueError, KeyError), err:
 537                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
 538                         return None
 539
 540         def process_info(self, info_dict):
 541                 """Process a single dictionary returned by an InfoExtractor."""
 542                 filename = self.prepare_filename(info_dict)
 543                 # Do nothing else if in simulate mode
 544                 if self.params.get('simulate', False):
 545                         # Forced printings
 546                         if self.params.get('forcetitle', False):
 547                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 548                         if self.params.get('forceurl', False):
 549                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 550                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 551                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
 552                         if self.params.get('forcedescription', False) and 'description' in info_dict:
 553                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
 554                         if self.params.get('forcefilename', False) and filename is not None:
 555                                 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
 556
 557                         return
 558
 559                 if filename is None:
 560                         return
 561                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 562                         self.to_stderr(u'WARNING: file exists and will be skipped')
 563                         return
 564
 565                 try:
 566                         self.pmkdir(filename)
 567                 except (OSError, IOError), err:
 568                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
 569                         return
 570
 571                 try:
 572                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
 573                 except (OSError, IOError), err:
 574                         raise UnavailableVideoError
 575                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 576                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 577                         return
 578                 except (ContentTooShortError, ), err:
 579                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 580                         return
 581
 582                 if success:
 583                         try:
 584                                 self.post_process(filename, info_dict)
 585                         except (PostProcessingError), err:
 586                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
 587                                 return
 588
 589         def download(self, url_list):
 590                 """Download a given list of URLs."""
 591                 if len(url_list) > 1 and self.fixed_template():
 592                         raise SameFileError(self.params['outtmpl'])
 593
 594                 for url in url_list:
 595                         suitable_found = False
 596                         for ie in self._ies:
 597                                 # Go to next InfoExtractor if not suitable
 598                                 if not ie.suitable(url):
 599                                         continue
 600
 601                                 # Suitable InfoExtractor found
 602                                 suitable_found = True
 603
 604                                 # Extract information from URL and process it
 605                                 ie.extract(url)
 606
 607                                 # Suitable InfoExtractor had been found; go to next URL
 608                                 break
 609
 610                         if not suitable_found:
 611                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 612
 613                 return self._download_retcode
 614
 615         def post_process(self, filename, ie_info):
 616                 """Run the postprocessing chain on the given file."""
 617                 info = dict(ie_info)
 618                 info['filepath'] = filename
 619                 for pp in self._pps:
 620                         info = pp.run(info)
 621                         if info is None:
 622                                 break
 623
 624         def _download_with_rtmpdump(self, filename, url, player_url):
 625                 self.report_destination(filename)
 626                 tmpfilename = self.temp_name(filename)
 627
 628                 # Check for rtmpdump first
 629                 try:
 630                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 631                 except (OSError, IOError):
 632                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 633                         return False
 634
 635                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 636                 # the connection was interrumpted and resuming appears to be
 637                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 638                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 639                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 640                 while retval == 2 or retval == 1:
 641                         prevsize = os.path.getsize(tmpfilename)
 642                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 643                         time.sleep(5.0) # This seems to be needed
 644                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 645                         cursize = os.path.getsize(tmpfilename)
 646                         if prevsize == cursize and retval == 1:
 647                                 break
 648                 if retval == 0:
 649                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
 650                         self.try_rename(tmpfilename, filename)
 651                         return True
 652                 else:
 653                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 654                         return False
 655
 656         def _do_download(self, filename, url, player_url):
 657                 # Check file already present
 658                 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
 659                         self.report_file_already_downloaded(filename)
 660                         return True
 661
 662                 # Attempt to download using rtmpdump
 663                 if url.startswith('rtmp'):
 664                         return self._download_with_rtmpdump(filename, url, player_url)
 665
 666                 tmpfilename = self.temp_name(filename)
 667                 stream = None
 668                 open_mode = 'wb'
 669
 670                 # Do not include the Accept-Encoding header
 671                 headers = {'Youtubedl-no-compression': 'True'}
 672                 basic_request = urllib2.Request(url, None, headers)
 673                 request = urllib2.Request(url, None, headers)
 674
 675                 # Establish possible resume length
 676                 if os.path.isfile(tmpfilename):
 677                         resume_len = os.path.getsize(tmpfilename)
 678                 else:
 679                         resume_len = 0
 680
 681                 # Request parameters in case of being able to resume
 682                 if self.params.get('continuedl', False) and resume_len != 0:
 683                         self.report_resuming_byte(resume_len)
 684                         request.add_header('Range','bytes=%d-' % resume_len)
 685                         open_mode = 'ab'
 686
 687                 count = 0
 688                 retries = self.params.get('retries', 0)
 689                 while count <= retries:
 690                         # Establish connection
 691                         try:
 692                                 data = urllib2.urlopen(request)
 693                                 break
 694                         except (urllib2.HTTPError, ), err:
 695                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 696                                         # Unexpected HTTP error
 697                                         raise
 698                                 elif err.code == 416:
 699                                         # Unable to resume (requested range not satisfiable)
 700                                         try:
 701                                                 # Open the connection again without the range header
 702                                                 data = urllib2.urlopen(basic_request)
 703                                                 content_length = data.info()['Content-Length']
 704                                         except (urllib2.HTTPError, ), err:
 705                                                 if err.code < 500 or err.code >= 600:
 706                                                         raise
 707                                         else:
 708                                                 # Examine the reported length
 709                                                 if (content_length is not None and
 710                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
 711                                                         # The file had already been fully downloaded.
 712                                                         # Explanation to the above condition: in issue #175 it was revealed that
 713                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
 714                                                         # changing the file size slightly and causing problems for some users. So
 715                                                         # I decided to implement a suggested change and consider the file
 716                                                         # completely downloaded if the file size differs less than 100 bytes from
 717                                                         # the one in the hard drive.
 718                                                         self.report_file_already_downloaded(filename)
 719                                                         self.try_rename(tmpfilename, filename)
 720                                                         return True
 721                                                 else:
 722                                                         # The length does not match, we start the download over
 723                                                         self.report_unable_to_resume()
 724                                                         open_mode = 'wb'
 725                                                         break
 726                         # Retry
 727                         count += 1
 728                         if count <= retries:
 729                                 self.report_retry(count, retries)
 730
 731                 if count > retries:
 732                         self.trouble(u'ERROR: giving up after %s retries' % retries)
 733                         return False
 734
 735                 data_len = data.info().get('Content-length', None)
 736                 if data_len is not None:
 737                         data_len = long(data_len) + resume_len
 738                 data_len_str = self.format_bytes(data_len)
 739                 byte_counter = 0 + resume_len
 740                 block_size = 1024
 741                 start = time.time()
 742                 while True:
 743                         # Download and write
 744                         before = time.time()
 745                         data_block = data.read(block_size)
 746                         after = time.time()
 747                         if len(data_block) == 0:
 748                                 break
 749                         byte_counter += len(data_block)
 750
 751                         # Open file just in time
 752                         if stream is None:
 753                                 try:
 754                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 755                                         filename = self.undo_temp_name(tmpfilename)
 756                                         self.report_destination(filename)
 757                                 except (OSError, IOError), err:
 758                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 759                                         return False
 760                         try:
 761                                 stream.write(data_block)
 762                         except (IOError, OSError), err:
 763                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 764                                 return False
 765                         block_size = self.best_block_size(after - before, len(data_block))
 766
 767                         # Progress message
 768                         percent_str = self.calc_percent(byte_counter, data_len)
 769                         eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 770                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 771                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 772
 773                         # Apply rate limit
 774                         self.slow_down(start, byte_counter - resume_len)
 775
 776                 stream.close()
 777                 self.report_finish()
 778                 if data_len is not None and byte_counter != data_len:
 779                         raise ContentTooShortError(byte_counter, long(data_len))
 780                 self.try_rename(tmpfilename, filename)
 781
 782                 # Update file modification time
 783                 if self.params.get('updatetime', True):
 784                         self.try_utime(filename, data.info().get('last-modified', None))
 785
 786                 return True
 787
 788 class InfoExtractor(object):
 789         """Information Extractor class.
 790
 791         Information extractors are the classes that, given a URL, extract
 792         information from the video (or videos) the URL refers to. This
 793         information includes the real video URL, the video title and simplified
 794         title, author and others. The information is stored in a dictionary
 795         which is then passed to the FileDownloader. The FileDownloader
 796         processes this information possibly downloading the video to the file
 797         system, among other possible outcomes. The dictionaries must include
 798         the following fields:
 799
 800         id:             Video identifier.
 801         url:            Final video URL.
 802         uploader:       Nickname of the video uploader.
 803         title:          Literal title.
 804         stitle:         Simplified title.
 805         ext:            Video filename extension.
 806         format:         Video format.
 807         player_url:     SWF Player URL (may be None).
 808
 809         The following fields are optional. Their primary purpose is to allow
 810         youtube-dl to serve as the backend for a video search function, such
 811         as the one in youtube2mp3.  They are only used when their respective
 812         forced printing functions are called:
 813
 814         thumbnail:      Full URL to a video thumbnail image.
 815         description:    One-line video description.
 816
 817         Subclasses of this one should re-define the _real_initialize() and
 818         _real_extract() methods, as well as the suitable() static method.
 819         Probably, they should also be instantiated and added to the main
 820         downloader.
 821         """
 822
 823         _ready = False
 824         _downloader = None
 825
 826         def __init__(self, downloader=None):
 827                 """Constructor. Receives an optional downloader."""
 828                 self._ready = False
 829                 self.set_downloader(downloader)
 830
 831         @staticmethod
 832         def suitable(url):
 833                 """Receives a URL and returns True if suitable for this IE."""
 834                 return False
 835
 836         def initialize(self):
 837                 """Initializes an instance (authentication, etc)."""
 838                 if not self._ready:
 839                         self._real_initialize()
 840                         self._ready = True
 841
 842         def extract(self, url):
 843                 """Extracts URL information and returns it in list of dicts."""
 844                 self.initialize()
 845                 return self._real_extract(url)
 846
 847         def set_downloader(self, downloader):
 848                 """Sets the downloader for this IE."""
 849                 self._downloader = downloader
 850
 851         def _real_initialize(self):
 852                 """Real initialization process. Redefine in subclasses."""
 853                 pass
 854
 855         def _real_extract(self, url):
 856                 """Real extraction process. Redefine in subclasses."""
 857                 pass
 858
 859 class YoutubeIE(InfoExtractor):
 860         """Information extractor for youtube.com."""
 861
 862         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
 863         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 864         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
 865         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 866         _NETRC_MACHINE = 'youtube'
 867         # Listed in order of quality
 868         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
 869         _video_extensions = {
 870                 '13': '3gp',
 871                 '17': 'mp4',
 872                 '18': 'mp4',
 873                 '22': 'mp4',
 874                 '37': 'mp4',
 875                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
 876                 '43': 'webm',
 877                 '45': 'webm',
 878         }
 879
 880         @staticmethod
 881         def suitable(url):
 882                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 883
 884         def report_lang(self):
 885                 """Report attempt to set language."""
 886                 self._downloader.to_screen(u'[youtube] Setting language')
 887
 888         def report_login(self):
 889                 """Report attempt to log in."""
 890                 self._downloader.to_screen(u'[youtube] Logging in')
 891
 892         def report_age_confirmation(self):
 893                 """Report attempt to confirm age."""
 894                 self._downloader.to_screen(u'[youtube] Confirming age')
 895
 896         def report_video_webpage_download(self, video_id):
 897                 """Report attempt to download video webpage."""
 898                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
 899
 900         def report_video_info_webpage_download(self, video_id):
 901                 """Report attempt to download video info webpage."""
 902                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
 903
 904         def report_information_extraction(self, video_id):
 905                 """Report attempt to extract video information."""
 906                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
 907
 908         def report_unavailable_format(self, video_id, format):
 909                 """Report extracted video URL."""
 910                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
 911
 912         def report_rtmp_download(self):
 913                 """Indicate the download will use the RTMP protocol."""
 914                 self._downloader.to_screen(u'[youtube] RTMP download detected')
 915
 916         def _real_initialize(self):
 917                 if self._downloader is None:
 918                         return
 919
 920                 username = None
 921                 password = None
 922                 downloader_params = self._downloader.params
 923
 924                 # Attempt to use provided username and password or .netrc data
 925                 if downloader_params.get('username', None) is not None:
 926                         username = downloader_params['username']
 927                         password = downloader_params['password']
 928                 elif downloader_params.get('usenetrc', False):
 929                         try:
 930                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 931                                 if info is not None:
 932                                         username = info[0]
 933                                         password = info[2]
 934                                 else:
 935                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 936                         except (IOError, netrc.NetrcParseError), err:
 937                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 938                                 return
 939
 940                 # Set language
 941                 request = urllib2.Request(self._LANG_URL)
 942                 try:
 943                         self.report_lang()
 944                         urllib2.urlopen(request).read()
 945                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 946                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 947                         return
 948
 949                 # No authentication to be performed
 950                 if username is None:
 951                         return
 952
 953                 # Log in
 954                 login_form = {
 955                                 'current_form': 'loginForm',
 956                                 'next':         '/',
 957                                 'action_login': 'Log In',
 958                                 'username':     username,
 959                                 'password':     password,
 960                                 }
 961                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
 962                 try:
 963                         self.report_login()
 964                         login_results = urllib2.urlopen(request).read()
 965                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 966                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 967                                 return
 968                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 969                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 970                         return
 971
 972                 # Confirm age
 973                 age_form = {
 974                                 'next_url':             '/',
 975                                 'action_confirm':       'Confirm',
 976                                 }
 977                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
 978                 try:
 979                         self.report_age_confirmation()
 980                         age_results = urllib2.urlopen(request).read()
 981                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 982                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 983                         return
 984
 985         def _real_extract(self, url):
 986                 # Extract video id from URL
 987                 mobj = re.match(self._VALID_URL, url)
 988                 if mobj is None:
 989                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 990                         return
 991                 video_id = mobj.group(2)
 992
 993                 # Get video webpage
 994                 self.report_video_webpage_download(video_id)
 995                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
 996                 try:
 997                         video_webpage = urllib2.urlopen(request).read()
 998                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 999                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1000                         return
1001
1002                 # Attempt to extract SWF player URL
1003                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1004                 if mobj is not None:
1005                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1006                 else:
1007                         player_url = None
1008
1009                 # Get video info
1010                 self.report_video_info_webpage_download(video_id)
1011                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1012                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1013                                            % (video_id, el_type))
1014                         request = urllib2.Request(video_info_url)
1015                         try:
1016                                 video_info_webpage = urllib2.urlopen(request).read()
1017                                 video_info = parse_qs(video_info_webpage)
1018                                 if 'token' in video_info:
1019                                         break
1020                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1021                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1022                                 return
1023                 if 'token' not in video_info:
1024                         if 'reason' in video_info:
1025                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1026                         else:
1027                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1028                         return
1029
1030                 # Start extracting information
1031                 self.report_information_extraction(video_id)
1032
1033                 # uploader
1034                 if 'author' not in video_info:
1035                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1036                         return
1037                 video_uploader = urllib.unquote_plus(video_info['author'][0])
1038
1039                 # title
1040                 if 'title' not in video_info:
1041                         self._downloader.trouble(u'ERROR: unable to extract video title')
1042                         return
1043                 video_title = urllib.unquote_plus(video_info['title'][0])
1044                 video_title = video_title.decode('utf-8')
1045                 video_title = sanitize_title(video_title)
1046
1047                 # simplified title
1048                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1049                 simple_title = simple_title.strip(ur'_')
1050
1051                 # thumbnail image
1052                 if 'thumbnail_url' not in video_info:
1053                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1054                         video_thumbnail = ''
1055                 else:   # don't panic if we can't find it
1056                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1057
1058                 # upload date
1059                 upload_date = u'NA'
1060                 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1061                 if mobj is not None:
1062                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1063                         format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1064                         for expression in format_expressions:
1065                                 try:
1066                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1067                                 except:
1068                                         pass
1069
1070                 # description
1071                 video_description = 'No description available.'
1072                 if self._downloader.params.get('forcedescription', False):
1073                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1074                         if mobj is not None:
1075                                 video_description = mobj.group(1)
1076
1077                 # token
1078                 video_token = urllib.unquote_plus(video_info['token'][0])
1079
1080                 # Decide which formats to download
1081                 req_format = self._downloader.params.get('format', None)
1082
1083                 if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]:
1084                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
1085                         format_limit = self._downloader.params.get('format_limit', None)
1086                         if format_limit is not None and format_limit in self._available_formats:
1087                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
1088                         else:
1089                                 format_list = self._available_formats
1090                         existing_formats = [x for x in format_list if x in url_map]
1091                         if len(existing_formats) == 0:
1092                                 self._downloader.trouble(u'ERROR: no known formats available for video')
1093                                 return
1094                         if req_format is None:
1095                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1096                         elif req_format == '-1':
1097                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1098                         else:
1099                                 # Specific format
1100                                 if req_format not in url_map:
1101                                         self._downloader.trouble(u'ERROR: requested format not available')
1102                                         return
1103                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
1104
1105                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1106                         self.report_rtmp_download()
1107                         video_url_list = [(None, video_info['conn'][0])]
1108
1109                 else:
1110                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
1111                         return
1112
1113                 for format_param, video_real_url in video_url_list:
1114                         # At this point we have a new video
1115                         self._downloader.increment_downloads()
1116
1117                         # Extension
1118                         video_extension = self._video_extensions.get(format_param, 'flv')
1119
1120                         # Find the video URL in fmt_url_map or conn paramters
1121                         try:
1122                                 # Process video information
1123                                 self._downloader.process_info({
1124                                         'id':           video_id.decode('utf-8'),
1125                                         'url':          video_real_url.decode('utf-8'),
1126                                         'uploader':     video_uploader.decode('utf-8'),
1127                                         'upload_date':  upload_date,
1128                                         'title':        video_title,
1129                                         'stitle':       simple_title,
1130                                         'ext':          video_extension.decode('utf-8'),
1131                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1132                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1133                                         'description':  video_description.decode('utf-8'),
1134                                         'player_url':   player_url,
1135                                 })
1136                         except UnavailableVideoError, err:
1137                                 self._downloader.trouble(u'\nERROR: unable to download video')
1138
1139
1140 class MetacafeIE(InfoExtractor):
1141         """Information Extractor for metacafe.com."""
1142
1143         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1144         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1145         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1146         _youtube_ie = None
1147
1148         def __init__(self, youtube_ie, downloader=None):
1149                 InfoExtractor.__init__(self, downloader)
1150                 self._youtube_ie = youtube_ie
1151
1152         @staticmethod
1153         def suitable(url):
1154                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1155
1156         def report_disclaimer(self):
1157                 """Report disclaimer retrieval."""
1158                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1159
1160         def report_age_confirmation(self):
1161                 """Report attempt to confirm age."""
1162                 self._downloader.to_screen(u'[metacafe] Confirming age')
1163
1164         def report_download_webpage(self, video_id):
1165                 """Report webpage download."""
1166                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1167
1168         def report_extraction(self, video_id):
1169                 """Report information extraction."""
1170                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1171
1172         def _real_initialize(self):
1173                 # Retrieve disclaimer
1174                 request = urllib2.Request(self._DISCLAIMER)
1175                 try:
1176                         self.report_disclaimer()
1177                         disclaimer = urllib2.urlopen(request).read()
1178                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1179                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1180                         return
1181
1182                 # Confirm age
1183                 disclaimer_form = {
1184                         'filters': '0',
1185                         'submit': "Continue - I'm over 18",
1186                         }
1187                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1188                 try:
1189                         self.report_age_confirmation()
1190                         disclaimer = urllib2.urlopen(request).read()
1191                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1192                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1193                         return
1194
1195         def _real_extract(self, url):
1196                 # Extract id and simplified title from URL
1197                 mobj = re.match(self._VALID_URL, url)
1198                 if mobj is None:
1199                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1200                         return
1201
1202                 video_id = mobj.group(1)
1203
1204                 # Check if video comes from YouTube
1205                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1206                 if mobj2 is not None:
1207                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1208                         return
1209
1210                 # At this point we have a new video
1211                 self._downloader.increment_downloads()
1212
1213                 simple_title = mobj.group(2).decode('utf-8')
1214
1215                 # Retrieve video webpage to extract further information
1216                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1217                 try:
1218                         self.report_download_webpage(video_id)
1219                         webpage = urllib2.urlopen(request).read()
1220                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1221                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1222                         return
1223
1224                 # Extract URL, uploader and title from webpage
1225                 self.report_extraction(video_id)
1226                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1227                 if mobj is not None:
1228                         mediaURL = urllib.unquote(mobj.group(1))
1229                         video_extension = mediaURL[-3:]
1230
1231                         # Extract gdaKey if available
1232                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1233                         if mobj is None:
1234                                 video_url = mediaURL
1235                         else:
1236                                 gdaKey = mobj.group(1)
1237                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1238                 else:
1239                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1240                         if mobj is None:
1241                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1242                                 return
1243                         vardict = parse_qs(mobj.group(1))
1244                         if 'mediaData' not in vardict:
1245                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1246                                 return
1247                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1248                         if mobj is None:
1249                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1250                                 return
1251                         mediaURL = mobj.group(1).replace('\\/', '/')
1252                         video_extension = mediaURL[-3:]
1253                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1254
1255                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1256                 if mobj is None:
1257                         self._downloader.trouble(u'ERROR: unable to extract title')
1258                         return
1259                 video_title = mobj.group(1).decode('utf-8')
1260                 video_title = sanitize_title(video_title)
1261
1262                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1263                 if mobj is None:
1264                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1265                         return
1266                 video_uploader = mobj.group(1)
1267
1268                 try:
1269                         # Process video information
1270                         self._downloader.process_info({
1271                                 'id':           video_id.decode('utf-8'),
1272                                 'url':          video_url.decode('utf-8'),
1273                                 'uploader':     video_uploader.decode('utf-8'),
1274                                 'upload_date':  u'NA',
1275                                 'title':        video_title,
1276                                 'stitle':       simple_title,
1277                                 'ext':          video_extension.decode('utf-8'),
1278                                 'format':       u'NA',
1279                                 'player_url':   None,
1280                         })
1281                 except UnavailableVideoError:
1282                         self._downloader.trouble(u'\nERROR: unable to download video')
1283
1284
1285 class DailymotionIE(InfoExtractor):
1286         """Information Extractor for Dailymotion"""
1287
1288         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1289
1290         def __init__(self, downloader=None):
1291                 InfoExtractor.__init__(self, downloader)
1292
1293         @staticmethod
1294         def suitable(url):
1295                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1296
1297         def report_download_webpage(self, video_id):
1298                 """Report webpage download."""
1299                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1300
1301         def report_extraction(self, video_id):
1302                 """Report information extraction."""
1303                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1304
1305         def _real_initialize(self):
1306                 return
1307
1308         def _real_extract(self, url):
1309                 # Extract id and simplified title from URL
1310                 mobj = re.match(self._VALID_URL, url)
1311                 if mobj is None:
1312                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1313                         return
1314
1315                 # At this point we have a new video
1316                 self._downloader.increment_downloads()
1317                 video_id = mobj.group(1)
1318
1319                 simple_title = mobj.group(2).decode('utf-8')
1320                 video_extension = 'flv'
1321
1322                 # Retrieve video webpage to extract further information
1323                 request = urllib2.Request(url)
1324                 try:
1325                         self.report_download_webpage(video_id)
1326                         webpage = urllib2.urlopen(request).read()
1327                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1328                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1329                         return
1330
1331                 # Extract URL, uploader and title from webpage
1332                 self.report_extraction(video_id)
1333                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1334                 if mobj is None:
1335                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1336                         return
1337                 mediaURL = urllib.unquote(mobj.group(1))
1338
1339                 # if needed add http://www.dailymotion.com/ if relative URL
1340
1341                 video_url = mediaURL
1342
1343                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1344                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1345                 if mobj is None:
1346                         self._downloader.trouble(u'ERROR: unable to extract title')
1347                         return
1348                 video_title = mobj.group(1).decode('utf-8')
1349                 video_title = sanitize_title(video_title)
1350
1351                 mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
1352                 if mobj is None:
1353                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1354                         return
1355                 video_uploader = mobj.group(1)
1356
1357                 try:
1358                         # Process video information
1359                         self._downloader.process_info({
1360                                 'id':           video_id.decode('utf-8'),
1361                                 'url':          video_url.decode('utf-8'),
1362                                 'uploader':     video_uploader.decode('utf-8'),
1363                                 'upload_date':  u'NA',
1364                                 'title':        video_title,
1365                                 'stitle':       simple_title,
1366                                 'ext':          video_extension.decode('utf-8'),
1367                                 'format':       u'NA',
1368                                 'player_url':   None,
1369                         })
1370                 except UnavailableVideoError:
1371                         self._downloader.trouble(u'\nERROR: unable to download video')
1372
1373 class GoogleIE(InfoExtractor):
1374         """Information extractor for video.google.com."""
1375
1376         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1377
1378         def __init__(self, downloader=None):
1379                 InfoExtractor.__init__(self, downloader)
1380
1381         @staticmethod
1382         def suitable(url):
1383                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1384
1385         def report_download_webpage(self, video_id):
1386                 """Report webpage download."""
1387                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1388
1389         def report_extraction(self, video_id):
1390                 """Report information extraction."""
1391                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1392
1393         def _real_initialize(self):
1394                 return
1395
1396         def _real_extract(self, url):
1397                 # Extract id from URL
1398                 mobj = re.match(self._VALID_URL, url)
1399                 if mobj is None:
1400                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1401                         return
1402
1403                 # At this point we have a new video
1404                 self._downloader.increment_downloads()
1405                 video_id = mobj.group(1)
1406
1407                 video_extension = 'mp4'
1408
1409                 # Retrieve video webpage to extract further information
1410                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1411                 try:
1412                         self.report_download_webpage(video_id)
1413                         webpage = urllib2.urlopen(request).read()
1414                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1415                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1416                         return
1417
1418                 # Extract URL, uploader, and title from webpage
1419                 self.report_extraction(video_id)
1420                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1421                 if mobj is None:
1422                         video_extension = 'flv'
1423                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1424                 if mobj is None:
1425                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1426                         return
1427                 mediaURL = urllib.unquote(mobj.group(1))
1428                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1429                 mediaURL = mediaURL.replace('\\x26', '\x26')
1430
1431                 video_url = mediaURL
1432
1433                 mobj = re.search(r'<title>(.*)</title>', webpage)
1434                 if mobj is None:
1435                         self._downloader.trouble(u'ERROR: unable to extract title')
1436                         return
1437                 video_title = mobj.group(1).decode('utf-8')
1438                 video_title = sanitize_title(video_title)
1439                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1440
1441                 # Extract video description
1442                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1443                 if mobj is None:
1444                         self._downloader.trouble(u'ERROR: unable to extract video description')
1445                         return
1446                 video_description = mobj.group(1).decode('utf-8')
1447                 if not video_description:
1448                         video_description = 'No description available.'
1449
1450                 # Extract video thumbnail
1451                 if self._downloader.params.get('forcethumbnail', False):
1452                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1453                         try:
1454                                 webpage = urllib2.urlopen(request).read()
1455                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1456                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1457                                 return
1458                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1459                         if mobj is None:
1460                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1461                                 return
1462                         video_thumbnail = mobj.group(1)
1463                 else:   # we need something to pass to process_info
1464                         video_thumbnail = ''
1465
1466
1467                 try:
1468                         # Process video information
1469                         self._downloader.process_info({
1470                                 'id':           video_id.decode('utf-8'),
1471                                 'url':          video_url.decode('utf-8'),
1472                                 'uploader':     u'NA',
1473                                 'upload_date':  u'NA',
1474                                 'title':        video_title,
1475                                 'stitle':       simple_title,
1476                                 'ext':          video_extension.decode('utf-8'),
1477                                 'format':       u'NA',
1478                                 'player_url':   None,
1479                         })
1480                 except UnavailableVideoError:
1481                         self._downloader.trouble(u'\nERROR: unable to download video')
1482
1483
1484 class PhotobucketIE(InfoExtractor):
1485         """Information extractor for photobucket.com."""
1486
1487         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1488
1489         def __init__(self, downloader=None):
1490                 InfoExtractor.__init__(self, downloader)
1491
1492         @staticmethod
1493         def suitable(url):
1494                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1495
1496         def report_download_webpage(self, video_id):
1497                 """Report webpage download."""
1498                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1499
1500         def report_extraction(self, video_id):
1501                 """Report information extraction."""
1502                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1503
1504         def _real_initialize(self):
1505                 return
1506
1507         def _real_extract(self, url):
1508                 # Extract id from URL
1509                 mobj = re.match(self._VALID_URL, url)
1510                 if mobj is None:
1511                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1512                         return
1513
1514                 # At this point we have a new video
1515                 self._downloader.increment_downloads()
1516                 video_id = mobj.group(1)
1517
1518                 video_extension = 'flv'
1519
1520                 # Retrieve video webpage to extract further information
1521                 request = urllib2.Request(url)
1522                 try:
1523                         self.report_download_webpage(video_id)
1524                         webpage = urllib2.urlopen(request).read()
1525                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1526                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1527                         return
1528
1529                 # Extract URL, uploader, and title from webpage
1530                 self.report_extraction(video_id)
1531                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1532                 if mobj is None:
1533                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1534                         return
1535                 mediaURL = urllib.unquote(mobj.group(1))
1536
1537                 video_url = mediaURL
1538
1539                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1540                 if mobj is None:
1541                         self._downloader.trouble(u'ERROR: unable to extract title')
1542                         return
1543                 video_title = mobj.group(1).decode('utf-8')
1544                 video_title = sanitize_title(video_title)
1545                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1546
1547                 video_uploader = mobj.group(2).decode('utf-8')
1548
1549                 try:
1550                         # Process video information
1551                         self._downloader.process_info({
1552                                 'id':           video_id.decode('utf-8'),
1553                                 'url':          video_url.decode('utf-8'),
1554                                 'uploader':     video_uploader,
1555                                 'upload_date':  u'NA',
1556                                 'title':        video_title,
1557                                 'stitle':       simple_title,
1558                                 'ext':          video_extension.decode('utf-8'),
1559                                 'format':       u'NA',
1560                                 'player_url':   None,
1561                         })
1562                 except UnavailableVideoError:
1563                         self._downloader.trouble(u'\nERROR: unable to download video')
1564
1565
1566 class YahooIE(InfoExtractor):
1567         """Information extractor for video.yahoo.com."""
1568
1569         # _VALID_URL matches all Yahoo! Video URLs
1570         # _VPAGE_URL matches only the extractable '/watch/' URLs
1571         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1572         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1573
1574         def __init__(self, downloader=None):
1575                 InfoExtractor.__init__(self, downloader)
1576
1577         @staticmethod
1578         def suitable(url):
1579                 return (re.match(YahooIE._VALID_URL, url) is not None)
1580
1581         def report_download_webpage(self, video_id):
1582                 """Report webpage download."""
1583                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1584
1585         def report_extraction(self, video_id):
1586                 """Report information extraction."""
1587                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1588
1589         def _real_initialize(self):
1590                 return
1591
1592         def _real_extract(self, url, new_video=True):
1593                 # Extract ID from URL
1594                 mobj = re.match(self._VALID_URL, url)
1595                 if mobj is None:
1596                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1597                         return
1598
1599                 # At this point we have a new video
1600                 self._downloader.increment_downloads()
1601                 video_id = mobj.group(2)
1602                 video_extension = 'flv'
1603
1604                 # Rewrite valid but non-extractable URLs as
1605                 # extractable English language /watch/ URLs
1606                 if re.match(self._VPAGE_URL, url) is None:
1607                         request = urllib2.Request(url)
1608                         try:
1609                                 webpage = urllib2.urlopen(request).read()
1610                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1611                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1612                                 return
1613
1614                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1615                         if mobj is None:
1616                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1617                                 return
1618                         yahoo_id = mobj.group(1)
1619
1620                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1621                         if mobj is None:
1622                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1623                                 return
1624                         yahoo_vid = mobj.group(1)
1625
1626                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1627                         return self._real_extract(url, new_video=False)
1628
1629                 # Retrieve video webpage to extract further information
1630                 request = urllib2.Request(url)
1631                 try:
1632                         self.report_download_webpage(video_id)
1633                         webpage = urllib2.urlopen(request).read()
1634                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1635                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1636                         return
1637
1638                 # Extract uploader and title from webpage
1639                 self.report_extraction(video_id)
1640                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1641                 if mobj is None:
1642                         self._downloader.trouble(u'ERROR: unable to extract video title')
1643                         return
1644                 video_title = mobj.group(1).decode('utf-8')
1645                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1646
1647                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1648                 if mobj is None:
1649                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1650                         return
1651                 video_uploader = mobj.group(1).decode('utf-8')
1652
1653                 # Extract video thumbnail
1654                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1655                 if mobj is None:
1656                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1657                         return
1658                 video_thumbnail = mobj.group(1).decode('utf-8')
1659
1660                 # Extract video description
1661                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1662                 if mobj is None:
1663                         self._downloader.trouble(u'ERROR: unable to extract video description')
1664                         return
1665                 video_description = mobj.group(1).decode('utf-8')
1666                 if not video_description: video_description = 'No description available.'
1667
1668                 # Extract video height and width
1669                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1670                 if mobj is None:
1671                         self._downloader.trouble(u'ERROR: unable to extract video height')
1672                         return
1673                 yv_video_height = mobj.group(1)
1674
1675                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1676                 if mobj is None:
1677                         self._downloader.trouble(u'ERROR: unable to extract video width')
1678                         return
1679                 yv_video_width = mobj.group(1)
1680
1681                 # Retrieve video playlist to extract media URL
1682                 # I'm not completely sure what all these options are, but we
1683                 # seem to need most of them, otherwise the server sends a 401.
1684                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1685                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1686                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1687                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1688                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1689                 try:
1690                         self.report_download_webpage(video_id)
1691                         webpage = urllib2.urlopen(request).read()
1692                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1693                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1694                         return
1695
1696                 # Extract media URL from playlist XML
1697                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1698                 if mobj is None:
1699                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1700                         return
1701                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1702                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1703
1704                 try:
1705                         # Process video information
1706                         self._downloader.process_info({
1707                                 'id':           video_id.decode('utf-8'),
1708                                 'url':          video_url,
1709                                 'uploader':     video_uploader,
1710                                 'upload_date':  u'NA',
1711                                 'title':        video_title,
1712                                 'stitle':       simple_title,
1713                                 'ext':          video_extension.decode('utf-8'),
1714                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1715                                 'description':  video_description,
1716                                 'thumbnail':    video_thumbnail,
1717                                 'description':  video_description,
1718                                 'player_url':   None,
1719                         })
1720                 except UnavailableVideoError:
1721                         self._downloader.trouble(u'\nERROR: unable to download video')
1722
1723
1724 class GenericIE(InfoExtractor):
1725         """Generic last-resort information extractor."""
1726
1727         def __init__(self, downloader=None):
1728                 InfoExtractor.__init__(self, downloader)
1729
1730         @staticmethod
1731         def suitable(url):
1732                 return True
1733
1734         def report_download_webpage(self, video_id):
1735                 """Report webpage download."""
1736                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1737                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1738
1739         def report_extraction(self, video_id):
1740                 """Report information extraction."""
1741                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1742
1743         def _real_initialize(self):
1744                 return
1745
1746         def _real_extract(self, url):
1747                 # At this point we have a new video
1748                 self._downloader.increment_downloads()
1749
1750                 video_id = url.split('/')[-1]
1751                 request = urllib2.Request(url)
1752                 try:
1753                         self.report_download_webpage(video_id)
1754                         webpage = urllib2.urlopen(request).read()
1755                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1756                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1757                         return
1758                 except ValueError, err:
1759                         # since this is the last-resort InfoExtractor, if
1760                         # this error is thrown, it'll be thrown here
1761                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1762                         return
1763
1764                 self.report_extraction(video_id)
1765                 # Start with something easy: JW Player in SWFObject
1766                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1767                 if mobj is None:
1768                         # Broaden the search a little bit
1769                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1770                 if mobj is None:
1771                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1772                         return
1773
1774                 # It's possible that one of the regexes
1775                 # matched, but returned an empty group:
1776                 if mobj.group(1) is None:
1777                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1778                         return
1779
1780                 video_url = urllib.unquote(mobj.group(1))
1781                 video_id  = os.path.basename(video_url)
1782
1783                 # here's a fun little line of code for you:
1784                 video_extension = os.path.splitext(video_id)[1][1:]
1785                 video_id        = os.path.splitext(video_id)[0]
1786
1787                 # it's tempting to parse this further, but you would
1788                 # have to take into account all the variations like
1789                 #   Video Title - Site Name
1790                 #   Site Name | Video Title
1791                 #   Video Title - Tagline | Site Name
1792                 # and so on and so forth; it's just not practical
1793                 mobj = re.search(r'<title>(.*)</title>', webpage)
1794                 if mobj is None:
1795                         self._downloader.trouble(u'ERROR: unable to extract title')
1796                         return
1797                 video_title = mobj.group(1).decode('utf-8')
1798                 video_title = sanitize_title(video_title)
1799                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1800
1801                 # video uploader is domain name
1802                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1803                 if mobj is None:
1804                         self._downloader.trouble(u'ERROR: unable to extract title')
1805                         return
1806                 video_uploader = mobj.group(1).decode('utf-8')
1807
1808                 try:
1809                         # Process video information
1810                         self._downloader.process_info({
1811                                 'id':           video_id.decode('utf-8'),
1812                                 'url':          video_url.decode('utf-8'),
1813                                 'uploader':     video_uploader,
1814                                 'upload_date':  u'NA',
1815                                 'title':        video_title,
1816                                 'stitle':       simple_title,
1817                                 'ext':          video_extension.decode('utf-8'),
1818                                 'format':       u'NA',
1819                                 'player_url':   None,
1820                         })
1821                 except UnavailableVideoError, err:
1822                         self._downloader.trouble(u'\nERROR: unable to download video')
1823
1824
1825 class YoutubeSearchIE(InfoExtractor):
1826         """Information Extractor for YouTube search queries."""
1827         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1828         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1829         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1830         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1831         _youtube_ie = None
1832         _max_youtube_results = 1000
1833
1834         def __init__(self, youtube_ie, downloader=None):
1835                 InfoExtractor.__init__(self, downloader)
1836                 self._youtube_ie = youtube_ie
1837
1838         @staticmethod
1839         def suitable(url):
1840                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1841
1842         def report_download_page(self, query, pagenum):
1843                 """Report attempt to download playlist page with given number."""
1844                 query = query.decode(preferredencoding())
1845                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1846
1847         def _real_initialize(self):
1848                 self._youtube_ie.initialize()
1849
1850         def _real_extract(self, query):
1851                 mobj = re.match(self._VALID_QUERY, query)
1852                 if mobj is None:
1853                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1854                         return
1855
1856                 prefix, query = query.split(':')
1857                 prefix = prefix[8:]
1858                 query  = query.encode('utf-8')
1859                 if prefix == '':
1860                         self._download_n_results(query, 1)
1861                         return
1862                 elif prefix == 'all':
1863                         self._download_n_results(query, self._max_youtube_results)
1864                         return
1865                 else:
1866                         try:
1867                                 n = long(prefix)
1868                                 if n <= 0:
1869                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1870                                         return
1871                                 elif n > self._max_youtube_results:
1872                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1873                                         n = self._max_youtube_results
1874                                 self._download_n_results(query, n)
1875                                 return
1876                         except ValueError: # parsing prefix as integer fails
1877                                 self._download_n_results(query, 1)
1878                                 return
1879
1880         def _download_n_results(self, query, n):
1881                 """Downloads a specified number of results for a query"""
1882
1883                 video_ids = []
1884                 already_seen = set()
1885                 pagenum = 1
1886
1887                 while True:
1888                         self.report_download_page(query, pagenum)
1889                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1890                         request = urllib2.Request(result_url)
1891                         try:
1892                                 page = urllib2.urlopen(request).read()
1893                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1894                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1895                                 return
1896
1897                         # Extract video identifiers
1898                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1899                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1900                                 if video_id not in already_seen:
1901                                         video_ids.append(video_id)
1902                                         already_seen.add(video_id)
1903                                         if len(video_ids) == n:
1904                                                 # Specified n videos reached
1905                                                 for id in video_ids:
1906                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1907                                                 return
1908
1909                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1910                                 for id in video_ids:
1911                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1912                                 return
1913
1914                         pagenum = pagenum + 1
1915
1916 class GoogleSearchIE(InfoExtractor):
1917         """Information Extractor for Google Video search queries."""
1918         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1919         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1920         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1921         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1922         _google_ie = None
1923         _max_google_results = 1000
1924
1925         def __init__(self, google_ie, downloader=None):
1926                 InfoExtractor.__init__(self, downloader)
1927                 self._google_ie = google_ie
1928
1929         @staticmethod
1930         def suitable(url):
1931                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1932
1933         def report_download_page(self, query, pagenum):
1934                 """Report attempt to download playlist page with given number."""
1935                 query = query.decode(preferredencoding())
1936                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1937
1938         def _real_initialize(self):
1939                 self._google_ie.initialize()
1940
1941         def _real_extract(self, query):
1942                 mobj = re.match(self._VALID_QUERY, query)
1943                 if mobj is None:
1944                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1945                         return
1946
1947                 prefix, query = query.split(':')
1948                 prefix = prefix[8:]
1949                 query  = query.encode('utf-8')
1950                 if prefix == '':
1951                         self._download_n_results(query, 1)
1952                         return
1953                 elif prefix == 'all':
1954                         self._download_n_results(query, self._max_google_results)
1955                         return
1956                 else:
1957                         try:
1958                                 n = long(prefix)
1959                                 if n <= 0:
1960                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1961                                         return
1962                                 elif n > self._max_google_results:
1963                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1964                                         n = self._max_google_results
1965                                 self._download_n_results(query, n)
1966                                 return
1967                         except ValueError: # parsing prefix as integer fails
1968                                 self._download_n_results(query, 1)
1969                                 return
1970
1971         def _download_n_results(self, query, n):
1972                 """Downloads a specified number of results for a query"""
1973
1974                 video_ids = []
1975                 already_seen = set()
1976                 pagenum = 1
1977
1978                 while True:
1979                         self.report_download_page(query, pagenum)
1980                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1981                         request = urllib2.Request(result_url)
1982                         try:
1983                                 page = urllib2.urlopen(request).read()
1984                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1985                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1986                                 return
1987
1988                         # Extract video identifiers
1989                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1990                                 video_id = mobj.group(1)
1991                                 if video_id not in already_seen:
1992                                         video_ids.append(video_id)
1993                                         already_seen.add(video_id)
1994                                         if len(video_ids) == n:
1995                                                 # Specified n videos reached
1996                                                 for id in video_ids:
1997                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1998                                                 return
1999
2000                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2001                                 for id in video_ids:
2002                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2003                                 return
2004
2005                         pagenum = pagenum + 1
2006
2007 class YahooSearchIE(InfoExtractor):
2008         """Information Extractor for Yahoo! Video search queries."""
2009         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
2010         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2011         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2012         _MORE_PAGES_INDICATOR = r'\s*Next'
2013         _yahoo_ie = None
2014         _max_yahoo_results = 1000
2015
2016         def __init__(self, yahoo_ie, downloader=None):
2017                 InfoExtractor.__init__(self, downloader)
2018                 self._yahoo_ie = yahoo_ie
2019
2020         @staticmethod
2021         def suitable(url):
2022                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
2023
2024         def report_download_page(self, query, pagenum):
2025                 """Report attempt to download playlist page with given number."""
2026                 query = query.decode(preferredencoding())
2027                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2028
2029         def _real_initialize(self):
2030                 self._yahoo_ie.initialize()
2031
2032         def _real_extract(self, query):
2033                 mobj = re.match(self._VALID_QUERY, query)
2034                 if mobj is None:
2035                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2036                         return
2037
2038                 prefix, query = query.split(':')
2039                 prefix = prefix[8:]
2040                 query  = query.encode('utf-8')
2041                 if prefix == '':
2042                         self._download_n_results(query, 1)
2043                         return
2044                 elif prefix == 'all':
2045                         self._download_n_results(query, self._max_yahoo_results)
2046                         return
2047                 else:
2048                         try:
2049                                 n = long(prefix)
2050                                 if n <= 0:
2051                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2052                                         return
2053                                 elif n > self._max_yahoo_results:
2054                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
2055                                         n = self._max_yahoo_results
2056                                 self._download_n_results(query, n)
2057                                 return
2058                         except ValueError: # parsing prefix as integer fails
2059                                 self._download_n_results(query, 1)
2060                                 return
2061
2062         def _download_n_results(self, query, n):
2063                 """Downloads a specified number of results for a query"""
2064
2065                 video_ids = []
2066                 already_seen = set()
2067                 pagenum = 1
2068
2069                 while True:
2070                         self.report_download_page(query, pagenum)
2071                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2072                         request = urllib2.Request(result_url)
2073                         try:
2074                                 page = urllib2.urlopen(request).read()
2075                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2076                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2077                                 return
2078
2079                         # Extract video identifiers
2080                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2081                                 video_id = mobj.group(1)
2082                                 if video_id not in already_seen:
2083                                         video_ids.append(video_id)
2084                                         already_seen.add(video_id)
2085                                         if len(video_ids) == n:
2086                                                 # Specified n videos reached
2087                                                 for id in video_ids:
2088                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2089                                                 return
2090
2091                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2092                                 for id in video_ids:
2093                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2094                                 return
2095
2096                         pagenum = pagenum + 1
2097
2098 class YoutubePlaylistIE(InfoExtractor):
2099         """Information Extractor for YouTube playlists."""
2100
2101         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2102         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2103         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2104         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2105         _youtube_ie = None
2106
2107         def __init__(self, youtube_ie, downloader=None):
2108                 InfoExtractor.__init__(self, downloader)
2109                 self._youtube_ie = youtube_ie
2110
2111         @staticmethod
2112         def suitable(url):
2113                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2114
2115         def report_download_page(self, playlist_id, pagenum):
2116                 """Report attempt to download playlist page with given number."""
2117                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2118
2119         def _real_initialize(self):
2120                 self._youtube_ie.initialize()
2121
2122         def _real_extract(self, url):
2123                 # Extract playlist id
2124                 mobj = re.match(self._VALID_URL, url)
2125                 if mobj is None:
2126                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2127                         return
2128
2129                 # Single video case
2130                 if mobj.group(3) is not None:
2131                         self._youtube_ie.extract(mobj.group(3))
2132                         return
2133
2134                 # Download playlist pages
2135                 # prefix is 'p' as default for playlists but there are other types that need extra care
2136                 playlist_prefix = mobj.group(1)
2137                 if playlist_prefix == 'a':
2138                         playlist_access = 'artist'
2139                 else:
2140                         playlist_prefix = 'p'
2141                         playlist_access = 'view_play_list'
2142                 playlist_id = mobj.group(2)
2143                 video_ids = []
2144                 pagenum = 1
2145
2146                 while True:
2147                         self.report_download_page(playlist_id, pagenum)
2148                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
2149                         try:
2150                                 page = urllib2.urlopen(request).read()
2151                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2152                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2153                                 return
2154
2155                         # Extract video identifiers
2156                         ids_in_page = []
2157                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2158                                 if mobj.group(1) not in ids_in_page:
2159                                         ids_in_page.append(mobj.group(1))
2160                         video_ids.extend(ids_in_page)
2161
2162                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2163                                 break
2164                         pagenum = pagenum + 1
2165
2166                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2167                 playlistend = self._downloader.params.get('playlistend', -1)
2168                 video_ids = video_ids[playliststart:playlistend]
2169
2170                 for id in video_ids:
2171                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2172                 return
2173
2174 class YoutubeUserIE(InfoExtractor):
2175         """Information Extractor for YouTube users."""
2176
2177         _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2178         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2179         _GDATA_PAGE_SIZE = 50
2180         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2181         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2182         _youtube_ie = None
2183
2184         def __init__(self, youtube_ie, downloader=None):
2185                 InfoExtractor.__init__(self, downloader)
2186                 self._youtube_ie = youtube_ie
2187
2188         @staticmethod
2189         def suitable(url):
2190                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2191
2192         def report_download_page(self, username, start_index):
2193                 """Report attempt to download user page."""
2194                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2195                                            (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2196
2197         def _real_initialize(self):
2198                 self._youtube_ie.initialize()
2199
2200         def _real_extract(self, url):
2201                 # Extract username
2202                 mobj = re.match(self._VALID_URL, url)
2203                 if mobj is None:
2204                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2205                         return
2206
2207                 username = mobj.group(1)
2208
2209                 # Download video ids using YouTube Data API. Result size per
2210                 # query is limited (currently to 50 videos) so we need to query
2211                 # page by page until there are no video ids - it means we got
2212                 # all of them.
2213
2214                 video_ids = []
2215                 pagenum = 0
2216
2217                 while True:
2218                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2219                         self.report_download_page(username, start_index)
2220
2221                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2222
2223                         try:
2224                                 page = urllib2.urlopen(request).read()
2225                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2226                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2227                                 return
2228
2229                         # Extract video identifiers
2230                         ids_in_page = []
2231
2232                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2233                                 if mobj.group(1) not in ids_in_page:
2234                                         ids_in_page.append(mobj.group(1))
2235
2236                         video_ids.extend(ids_in_page)
2237
2238                         # A little optimization - if current page is not
2239                         # "full", ie. does not contain PAGE_SIZE video ids then
2240                         # we can assume that this page is the last one - there
2241                         # are no more ids on further pages - no need to query
2242                         # again.
2243
2244                         if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2245                                 break
2246
2247                         pagenum += 1
2248
2249                 all_ids_count = len(video_ids)
2250                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2251                 playlistend = self._downloader.params.get('playlistend', -1)
2252
2253                 if playlistend == -1:
2254                         video_ids = video_ids[playliststart:]
2255                 else:
2256                         video_ids = video_ids[playliststart:playlistend]
2257
2258                 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2259                                            (username, all_ids_count, len(video_ids)))
2260
2261                 for video_id in video_ids:
2262                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2263
2264
2265 class DepositFilesIE(InfoExtractor):
2266         """Information extractor for depositfiles.com"""
2267
2268         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2269
2270         def __init__(self, downloader=None):
2271                 InfoExtractor.__init__(self, downloader)
2272
2273         @staticmethod
2274         def suitable(url):
2275                 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2276
2277         def report_download_webpage(self, file_id):
2278                 """Report webpage download."""
2279                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2280
2281         def report_extraction(self, file_id):
2282                 """Report information extraction."""
2283                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2284
2285         def _real_initialize(self):
2286                 return
2287
2288         def _real_extract(self, url):
2289                 # At this point we have a new file
2290                 self._downloader.increment_downloads()
2291
2292                 file_id = url.split('/')[-1]
2293                 # Rebuild url in english locale
2294                 url = 'http://depositfiles.com/en/files/' + file_id
2295
2296                 # Retrieve file webpage with 'Free download' button pressed
2297                 free_download_indication = { 'gateway_result' : '1' }
2298                 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2299                 try:
2300                         self.report_download_webpage(file_id)
2301                         webpage = urllib2.urlopen(request).read()
2302                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2303                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2304                         return
2305
2306                 # Search for the real file URL
2307                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2308                 if (mobj is None) or (mobj.group(1) is None):
2309                         # Try to figure out reason of the error.
2310                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2311                         if (mobj is not None) and (mobj.group(1) is not None):
2312                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2313                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2314                         else:
2315                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2316                         return
2317
2318                 file_url = mobj.group(1)
2319                 file_extension = os.path.splitext(file_url)[1][1:]
2320
2321                 # Search for file title
2322                 mobj = re.search(r'<b title="(.*?)">', webpage)
2323                 if mobj is None:
2324                         self._downloader.trouble(u'ERROR: unable to extract title')
2325                         return
2326                 file_title = mobj.group(1).decode('utf-8')
2327
2328                 try:
2329                         # Process file information
2330                         self._downloader.process_info({
2331                                 'id':           file_id.decode('utf-8'),
2332                                 'url':          file_url.decode('utf-8'),
2333                                 'uploader':     u'NA',
2334                                 'upload_date':  u'NA',
2335                                 'title':        file_title,
2336                                 'stitle':       file_title,
2337                                 'ext':          file_extension.decode('utf-8'),
2338                                 'format':       u'NA',
2339                                 'player_url':   None,
2340                         })
2341                 except UnavailableVideoError, err:
2342                         self._downloader.trouble(u'ERROR: unable to download file')
2343
2344 class FacebookIE(InfoExtractor):
2345         """Information Extractor for Facebook"""
2346
2347         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2348         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2349         _NETRC_MACHINE = 'facebook'
2350         _available_formats = ['highqual', 'lowqual']
2351         _video_extensions = {
2352                 'highqual': 'mp4',
2353                 'lowqual': 'mp4',
2354         }
2355
2356         def __init__(self, downloader=None):
2357                 InfoExtractor.__init__(self, downloader)
2358
2359         @staticmethod
2360         def suitable(url):
2361                 return (re.match(FacebookIE._VALID_URL, url) is not None)
2362
2363         def _reporter(self, message):
2364                 """Add header and report message."""
2365                 self._downloader.to_screen(u'[facebook] %s' % message)
2366
2367         def report_login(self):
2368                 """Report attempt to log in."""
2369                 self._reporter(u'Logging in')
2370
2371         def report_video_webpage_download(self, video_id):
2372                 """Report attempt to download video webpage."""
2373                 self._reporter(u'%s: Downloading video webpage' % video_id)
2374
2375         def report_information_extraction(self, video_id):
2376                 """Report attempt to extract video information."""
2377                 self._reporter(u'%s: Extracting video information' % video_id)
2378
2379         def _parse_page(self, video_webpage):
2380                 """Extract video information from page"""
2381                 # General data
2382                 data = {'title': r'class="video_title datawrap">(.*?)</',
2383                         'description': r'<div class="datawrap">(.*?)</div>',
2384                         'owner': r'\("video_owner_name", "(.*?)"\)',
2385                         'upload_date': r'data-date="(.*?)"',
2386                         'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2387                         }
2388                 video_info = {}
2389                 for piece in data.keys():
2390                         mobj = re.search(data[piece], video_webpage)
2391                         if mobj is not None:
2392                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2393
2394                 # Video urls
2395                 video_urls = {}
2396                 for fmt in self._available_formats:
2397                         mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2398                         if mobj is not None:
2399                                 # URL is in a Javascript segment inside an escaped Unicode format within
2400                                 # the generally utf-8 page
2401                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2402                 video_info['video_urls'] = video_urls
2403
2404                 return video_info
2405
2406         def _real_initialize(self):
2407                 if self._downloader is None:
2408                         return
2409
2410                 useremail = None
2411                 password = None
2412                 downloader_params = self._downloader.params
2413
2414                 # Attempt to use provided username and password or .netrc data
2415                 if downloader_params.get('username', None) is not None:
2416                         useremail = downloader_params['username']
2417                         password = downloader_params['password']
2418                 elif downloader_params.get('usenetrc', False):
2419                         try:
2420                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2421                                 if info is not None:
2422                                         useremail = info[0]
2423                                         password = info[2]
2424                                 else:
2425                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2426                         except (IOError, netrc.NetrcParseError), err:
2427                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2428                                 return
2429
2430                 if useremail is None:
2431                         return
2432
2433                 # Log in
2434                 login_form = {
2435                         'email': useremail,
2436                         'pass': password,
2437                         'login': 'Log+In'
2438                         }
2439                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2440                 try:
2441                         self.report_login()
2442                         login_results = urllib2.urlopen(request).read()
2443                         if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2444                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2445                                 return
2446                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2447                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2448                         return
2449
2450         def _real_extract(self, url):
2451                 mobj = re.match(self._VALID_URL, url)
2452                 if mobj is None:
2453                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2454                         return
2455                 video_id = mobj.group('ID')
2456
2457                 # Get video webpage
2458                 self.report_video_webpage_download(video_id)
2459                 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2460                 try:
2461                         page = urllib2.urlopen(request)
2462                         video_webpage = page.read()
2463                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2464                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2465                         return
2466
2467                 # Start extracting information
2468                 self.report_information_extraction(video_id)
2469
2470                 # Extract information
2471                 video_info = self._parse_page(video_webpage)
2472
2473                 # uploader
2474                 if 'owner' not in video_info:
2475                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2476                         return
2477                 video_uploader = video_info['owner']
2478
2479                 # title
2480                 if 'title' not in video_info:
2481                         self._downloader.trouble(u'ERROR: unable to extract video title')
2482                         return
2483                 video_title = video_info['title']
2484                 video_title = video_title.decode('utf-8')
2485                 video_title = sanitize_title(video_title)
2486
2487                 # simplified title
2488                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2489                 simple_title = simple_title.strip(ur'_')
2490
2491                 # thumbnail image
2492                 if 'thumbnail' not in video_info:
2493                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2494                         video_thumbnail = ''
2495                 else:
2496                         video_thumbnail = video_info['thumbnail']
2497
2498                 # upload date
2499                 upload_date = u'NA'
2500                 if 'upload_date' in video_info:
2501                         upload_time = video_info['upload_date']
2502                         timetuple = email.utils.parsedate_tz(upload_time)
2503                         if timetuple is not None:
2504                                 try:
2505                                         upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2506                                 except:
2507                                         pass
2508
2509                 # description
2510                 video_description = 'No description available.'
2511                 if (self._downloader.params.get('forcedescription', False) and
2512                     'description' in video_info):
2513                         video_description = video_info['description']
2514
2515                 url_map = video_info['video_urls']
2516                 if len(url_map.keys()) > 0:
2517                         # Decide which formats to download
2518                         req_format = self._downloader.params.get('format', None)
2519                         format_limit = self._downloader.params.get('format_limit', None)
2520
2521                         if format_limit is not None and format_limit in self._available_formats:
2522                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
2523                         else:
2524                                 format_list = self._available_formats
2525                         existing_formats = [x for x in format_list if x in url_map]
2526                         if len(existing_formats) == 0:
2527                                 self._downloader.trouble(u'ERROR: no known formats available for video')
2528                                 return
2529                         if req_format is None:
2530                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2531                         elif req_format == '-1':
2532                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2533                         else:
2534                                 # Specific format
2535                                 if req_format not in url_map:
2536                                         self._downloader.trouble(u'ERROR: requested format not available')
2537                                         return
2538                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
2539
2540                 for format_param, video_real_url in video_url_list:
2541
2542                         # At this point we have a new video
2543                         self._downloader.increment_downloads()
2544
2545                         # Extension
2546                         video_extension = self._video_extensions.get(format_param, 'mp4')
2547
2548                         # Find the video URL in fmt_url_map or conn paramters
2549                         try:
2550                                 # Process video information
2551                                 self._downloader.process_info({
2552                                         'id':           video_id.decode('utf-8'),
2553                                         'url':          video_real_url.decode('utf-8'),
2554                                         'uploader':     video_uploader.decode('utf-8'),
2555                                         'upload_date':  upload_date,
2556                                         'title':        video_title,
2557                                         'stitle':       simple_title,
2558                                         'ext':          video_extension.decode('utf-8'),
2559                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
2560                                         'thumbnail':    video_thumbnail.decode('utf-8'),
2561                                         'description':  video_description.decode('utf-8'),
2562                                         'player_url':   None,
2563                                 })
2564                         except UnavailableVideoError, err:
2565                                 self._downloader.trouble(u'\nERROR: unable to download video')
2566
2567 class BlipTVIE(InfoExtractor):
2568         """Information extractor for blip.tv"""
2569
2570         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip.tv/(.+)$'
2571         _URL_EXT = r'^.*\.([a-z0-9]+)$'
2572
2573         @staticmethod
2574         def suitable(url):
2575                 return (re.match(BlipTVIE._VALID_URL, url) is not None)
2576
2577         def report_download_webpage(self, file_id):
2578                 """Report webpage download."""
2579                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.service_name, file_id))
2580
2581         def report_extraction(self, file_id):
2582                 """Report information extraction."""
2583                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.service_name, file_id))
2584
2585         @property
2586         def service_name(self):
2587                 return u'blip.tv'
2588
2589         def _simplify_title(self, title):
2590                 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
2591                 res = res.strip(ur'_')
2592                 return res
2593
2594         def _real_extract(self, url):
2595                 mobj = re.match(self._VALID_URL, url)
2596                 if mobj is None:
2597                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2598                         return
2599
2600                 json_url = url + ('&' if '?' in url else '?') + 'skin=json&version=2&no_wrap=1'
2601                 request = urllib2.Request(json_url)
2602                 try:
2603                         json_code = urllib2.urlopen(request).read()
2604                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2605                         self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
2606                         return
2607                 try:
2608                         json_data = json.loads(json_code)
2609                         data = json_data['Post']
2610
2611                         upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
2612                         video_url = data['media']['url']
2613                         umobj = re.match(self._URL_EXT, video_url)
2614                         if umobj is None:
2615                                 raise ValueError('Can not determine filename extension')
2616                         ext = umobj.group(1)
2617
2618                         info = {
2619                                 'id': data['item_id'],
2620                                 'url': video_url,
2621                                 'uploader': data['display_name'],
2622                                 'upload_date': upload_date,
2623                                 'title': data['title'],
2624                                 'stitle': self._simplify_title(data['title']),
2625                                 'ext': ext,
2626                                 'format': data['media']['mimeType'],
2627                                 'thumbnail': data['thumbnailUrl'],
2628                                 'description': data['description'],
2629                                 'player_url': data['embedUrl']
2630                         }
2631                 except (ValueError,KeyError), err:
2632                         self._downloader.trouble(u'ERROR: unable to parse video information: %s' % str(err))
2633                         return
2634
2635                 try:
2636                         self._downloader.process_info(info)
2637                 except UnavailableVideoError, err:
2638                         self._downloader.trouble(u'\nERROR: unable to download video')
2639
2640
2641 class PostProcessor(object):
2642         """Post Processor class.
2643
2644         PostProcessor objects can be added to downloaders with their
2645         add_post_processor() method. When the downloader has finished a
2646         successful download, it will take its internal chain of PostProcessors
2647         and start calling the run() method on each one of them, first with
2648         an initial argument and then with the returned value of the previous
2649         PostProcessor.
2650
2651         The chain will be stopped if one of them ever returns None or the end
2652         of the chain is reached.
2653
2654         PostProcessor objects follow a "mutual registration" process similar
2655         to InfoExtractor objects.
2656         """
2657
2658         _downloader = None
2659
2660         def __init__(self, downloader=None):
2661                 self._downloader = downloader
2662
2663         def set_downloader(self, downloader):
2664                 """Sets the downloader for this PP."""
2665                 self._downloader = downloader
2666
2667         def run(self, information):
2668                 """Run the PostProcessor.
2669
2670                 The "information" argument is a dictionary like the ones
2671                 composed by InfoExtractors. The only difference is that this
2672                 one has an extra field called "filepath" that points to the
2673                 downloaded file.
2674
2675                 When this method returns None, the postprocessing chain is
2676                 stopped. However, this method may return an information
2677                 dictionary that will be passed to the next postprocessing
2678                 object in the chain. It can be the one it received after
2679                 changing some fields.
2680
2681                 In addition, this method may raise a PostProcessingError
2682                 exception that will be taken into account by the downloader
2683                 it was called from.
2684                 """
2685                 return information # by default, do nothing
2686
2687 class FFmpegExtractAudioPP(PostProcessor):
2688
2689         def __init__(self, downloader=None, preferredcodec=None):
2690                 PostProcessor.__init__(self, downloader)
2691                 if preferredcodec is None:
2692                         preferredcodec = 'best'
2693                 self._preferredcodec = preferredcodec
2694
2695         @staticmethod
2696         def get_audio_codec(path):
2697                 try:
2698                         cmd = ['ffprobe', '-show_streams', '--', path]
2699                         handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
2700                         output = handle.communicate()[0]
2701                         if handle.wait() != 0:
2702                                 return None
2703                 except (IOError, OSError):
2704                         return None
2705                 audio_codec = None
2706                 for line in output.split('\n'):
2707                         if line.startswith('codec_name='):
2708                                 audio_codec = line.split('=')[1].strip()
2709                         elif line.strip() == 'codec_type=audio' and audio_codec is not None:
2710                                 return audio_codec
2711                 return None
2712
2713         @staticmethod
2714         def run_ffmpeg(path, out_path, codec, more_opts):
2715                 try:
2716                         cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
2717                         ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
2718                         return (ret == 0)
2719                 except (IOError, OSError):
2720                         return False
2721
2722         def run(self, information):
2723                 path = information['filepath']
2724
2725                 filecodec = self.get_audio_codec(path)
2726                 if filecodec is None:
2727                         self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
2728                         return None
2729
2730                 more_opts = []
2731                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
2732                         if filecodec == 'aac' or filecodec == 'mp3':
2733                                 # Lossless if possible
2734                                 acodec = 'copy'
2735                                 extension = filecodec
2736                                 if filecodec == 'aac':
2737                                         more_opts = ['-f', 'adts']
2738                         else:
2739                                 # MP3 otherwise.
2740                                 acodec = 'libmp3lame'
2741                                 extension = 'mp3'
2742                                 more_opts = ['-ab', '128k']
2743                 else:
2744                         # We convert the audio (lossy)
2745                         acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
2746                         extension = self._preferredcodec
2747                         more_opts = ['-ab', '128k']
2748                         if self._preferredcodec == 'aac':
2749                                 more_opts += ['-f', 'adts']
2750
2751                 (prefix, ext) = os.path.splitext(path)
2752                 new_path = prefix + '.' + extension
2753                 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
2754                 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
2755
2756                 if not status:
2757                         self._downloader.to_stderr(u'WARNING: error running ffmpeg')
2758                         return None
2759
2760                 try:
2761                         os.remove(path)
2762                 except (IOError, OSError):
2763                         self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
2764                         return None
2765
2766                 information['filepath'] = new_path
2767                 return information
2768
2769 ### MAIN PROGRAM ###
2770 if __name__ == '__main__':
2771         try:
2772                 # Modules needed only when running the main program
2773                 import getpass
2774                 import optparse
2775
2776                 # Function to update the program file with the latest version from the repository.
2777                 def update_self(downloader, filename):
2778                         # Note: downloader only used for options
2779                         if not os.access(filename, os.W_OK):
2780                                 sys.exit('ERROR: no write permissions on %s' % filename)
2781
2782                         downloader.to_screen('Updating to latest stable version...')
2783                         try:
2784                                 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2785                                 latest_version = urllib.urlopen(latest_url).read().strip()
2786                                 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2787                                 newcontent = urllib.urlopen(prog_url).read()
2788                         except (IOError, OSError), err:
2789                                 sys.exit('ERROR: unable to download latest version')
2790                         try:
2791                                 stream = open(filename, 'w')
2792                                 stream.write(newcontent)
2793                                 stream.close()
2794                         except (IOError, OSError), err:
2795                                 sys.exit('ERROR: unable to overwrite current version')
2796                         downloader.to_screen('Updated to version %s' % latest_version)
2797
2798                 # Parse command line
2799                 parser = optparse.OptionParser(
2800                         usage='Usage: %prog [options] url...',
2801                         version='2011.03.29',
2802                         conflict_handler='resolve',
2803                 )
2804
2805                 parser.add_option('-h', '--help',
2806                                 action='help', help='print this help text and exit')
2807                 parser.add_option('-v', '--version',
2808                                 action='version', help='print program version and exit')
2809                 parser.add_option('-U', '--update',
2810                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2811                 parser.add_option('-i', '--ignore-errors',
2812                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2813                 parser.add_option('-r', '--rate-limit',
2814                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2815                 parser.add_option('-R', '--retries',
2816                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2817                 parser.add_option('--playlist-start',
2818                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2819                 parser.add_option('--playlist-end',
2820                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2821                 parser.add_option('--dump-user-agent',
2822                                 action='store_true', dest='dump_user_agent',
2823                                 help='display the current browser identification', default=False)
2824
2825                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2826                 authentication.add_option('-u', '--username',
2827                                 dest='username', metavar='USERNAME', help='account username')
2828                 authentication.add_option('-p', '--password',
2829                                 dest='password', metavar='PASSWORD', help='account password')
2830                 authentication.add_option('-n', '--netrc',
2831                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2832                 parser.add_option_group(authentication)
2833
2834                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2835                 video_format.add_option('-f', '--format',
2836                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2837                 video_format.add_option('--all-formats',
2838                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2839                 video_format.add_option('--max-quality',
2840                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2841                 parser.add_option_group(video_format)
2842
2843                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2844                 verbosity.add_option('-q', '--quiet',
2845                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2846                 verbosity.add_option('-s', '--simulate',
2847                                 action='store_true', dest='simulate', help='do not download video', default=False)
2848                 verbosity.add_option('-g', '--get-url',
2849                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2850                 verbosity.add_option('-e', '--get-title',
2851                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2852                 verbosity.add_option('--get-thumbnail',
2853                                 action='store_true', dest='getthumbnail',
2854                                 help='simulate, quiet but print thumbnail URL', default=False)
2855                 verbosity.add_option('--get-description',
2856                                 action='store_true', dest='getdescription',
2857                                 help='simulate, quiet but print video description', default=False)
2858                 verbosity.add_option('--get-filename',
2859                                 action='store_true', dest='getfilename',
2860                                 help='simulate, quiet but print output filename', default=False)
2861                 verbosity.add_option('--no-progress',
2862                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2863                 verbosity.add_option('--console-title',
2864                                 action='store_true', dest='consoletitle',
2865                                 help='display progress in console titlebar', default=False)
2866                 parser.add_option_group(verbosity)
2867
2868                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2869                 filesystem.add_option('-t', '--title',
2870                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2871                 filesystem.add_option('-l', '--literal',
2872                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2873                 filesystem.add_option('-A', '--auto-number',
2874                                 action='store_true', dest='autonumber',
2875                                 help='number downloaded files starting from 00000', default=False)
2876                 filesystem.add_option('-o', '--output',
2877                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2878                 filesystem.add_option('-a', '--batch-file',
2879                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2880                 filesystem.add_option('-w', '--no-overwrites',
2881                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2882                 filesystem.add_option('-c', '--continue',
2883                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2884                 filesystem.add_option('--cookies',
2885                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2886                 filesystem.add_option('--no-part',
2887                                 action='store_true', dest='nopart', help='do not use .part files', default=False)
2888                 filesystem.add_option('--no-mtime',
2889                                 action='store_false', dest='updatetime',
2890                                 help='do not use the Last-modified header to set the file modification time', default=True)
2891                 parser.add_option_group(filesystem)
2892
2893                 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
2894                 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
2895                                 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
2896                 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
2897                                 help='"best", "aac" or "mp3"; best by default')
2898                 parser.add_option_group(postproc)
2899
2900                 (opts, args) = parser.parse_args()
2901
2902                 # Open appropriate CookieJar
2903                 if opts.cookiefile is None:
2904                         jar = cookielib.CookieJar()
2905                 else:
2906                         try:
2907                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2908                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2909                                         jar.load()
2910                         except (IOError, OSError), err:
2911                                 sys.exit(u'ERROR: unable to open cookie file')
2912
2913                 # Dump user agent
2914                 if opts.dump_user_agent:
2915                         print std_headers['User-Agent']
2916                         sys.exit(0)
2917
2918                 # General configuration
2919                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2920                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
2921                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2922
2923                 # Batch file verification
2924                 batchurls = []
2925                 if opts.batchfile is not None:
2926                         try:
2927                                 if opts.batchfile == '-':
2928                                         batchfd = sys.stdin
2929                                 else:
2930                                         batchfd = open(opts.batchfile, 'r')
2931                                 batchurls = batchfd.readlines()
2932                                 batchurls = [x.strip() for x in batchurls]
2933                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2934                         except IOError:
2935                                 sys.exit(u'ERROR: batch file could not be read')
2936                 all_urls = batchurls + args
2937
2938                 # Conflicting, missing and erroneous options
2939                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2940                         parser.error(u'using .netrc conflicts with giving username/password')
2941                 if opts.password is not None and opts.username is None:
2942                         parser.error(u'account username missing')
2943                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2944                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2945                 if opts.usetitle and opts.useliteral:
2946                         parser.error(u'using title conflicts with using literal title')
2947                 if opts.username is not None and opts.password is None:
2948                         opts.password = getpass.getpass(u'Type account password and press return:')
2949                 if opts.ratelimit is not None:
2950                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2951                         if numeric_limit is None:
2952                                 parser.error(u'invalid rate limit specified')
2953                         opts.ratelimit = numeric_limit
2954                 if opts.retries is not None:
2955                         try:
2956                                 opts.retries = long(opts.retries)
2957                         except (TypeError, ValueError), err:
2958                                 parser.error(u'invalid retry count specified')
2959                 try:
2960                         opts.playliststart = long(opts.playliststart)
2961                         if opts.playliststart <= 0:
2962                                 raise ValueError
2963                 except (TypeError, ValueError), err:
2964                         parser.error(u'invalid playlist start number specified')
2965                 try:
2966                         opts.playlistend = long(opts.playlistend)
2967                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2968                                 raise ValueError
2969                 except (TypeError, ValueError), err:
2970                         parser.error(u'invalid playlist end number specified')
2971                 if opts.extractaudio:
2972                         if opts.audioformat not in ['best', 'aac', 'mp3']:
2973                                 parser.error(u'invalid audio format specified')
2974
2975                 # Information extractors
2976                 youtube_ie = YoutubeIE()
2977                 metacafe_ie = MetacafeIE(youtube_ie)
2978                 dailymotion_ie = DailymotionIE()
2979                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2980                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2981                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2982                 google_ie = GoogleIE()
2983                 google_search_ie = GoogleSearchIE(google_ie)
2984                 photobucket_ie = PhotobucketIE()
2985                 yahoo_ie = YahooIE()
2986                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2987                 deposit_files_ie = DepositFilesIE()
2988                 facebook_ie = FacebookIE()
2989                 bliptv_ie = BlipTVIE()
2990                 generic_ie = GenericIE()
2991
2992                 # File downloader
2993                 fd = FileDownloader({
2994                         'usenetrc': opts.usenetrc,
2995                         'username': opts.username,
2996                         'password': opts.password,
2997                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
2998                         'forceurl': opts.geturl,
2999                         'forcetitle': opts.gettitle,
3000                         'forcethumbnail': opts.getthumbnail,
3001                         'forcedescription': opts.getdescription,
3002                         'forcefilename': opts.getfilename,
3003                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
3004                         'format': opts.format,
3005                         'format_limit': opts.format_limit,
3006                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
3007                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
3008                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
3009                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
3010                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
3011                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
3012                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
3013                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
3014                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
3015                                 or u'%(id)s.%(ext)s'),
3016                         'ignoreerrors': opts.ignoreerrors,
3017                         'ratelimit': opts.ratelimit,
3018                         'nooverwrites': opts.nooverwrites,
3019                         'retries': opts.retries,
3020                         'continuedl': opts.continue_dl,
3021                         'noprogress': opts.noprogress,
3022                         'playliststart': opts.playliststart,
3023                         'playlistend': opts.playlistend,
3024                         'logtostderr': opts.outtmpl == '-',
3025                         'consoletitle': opts.consoletitle,
3026                         'nopart': opts.nopart,
3027                         'updatetime': opts.updatetime,
3028                         })
3029                 fd.add_info_extractor(youtube_search_ie)
3030                 fd.add_info_extractor(youtube_pl_ie)
3031                 fd.add_info_extractor(youtube_user_ie)
3032                 fd.add_info_extractor(metacafe_ie)
3033                 fd.add_info_extractor(dailymotion_ie)
3034                 fd.add_info_extractor(youtube_ie)
3035                 fd.add_info_extractor(google_ie)
3036                 fd.add_info_extractor(google_search_ie)
3037                 fd.add_info_extractor(photobucket_ie)
3038                 fd.add_info_extractor(yahoo_ie)
3039                 fd.add_info_extractor(yahoo_search_ie)
3040                 fd.add_info_extractor(deposit_files_ie)
3041                 fd.add_info_extractor(facebook_ie)
3042                 fd.add_info_extractor(bliptv_ie)
3043
3044                 # This must come last since it's the
3045                 # fallback if none of the others work
3046                 fd.add_info_extractor(generic_ie)
3047
3048                 # PostProcessors
3049                 if opts.extractaudio:
3050                         fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
3051
3052                 # Update version
3053                 if opts.update_self:
3054                         update_self(fd, sys.argv[0])
3055
3056                 # Maybe do nothing
3057                 if len(all_urls) < 1:
3058                         if not opts.update_self:
3059                                 parser.error(u'you must provide at least one URL')
3060                         else:
3061                                 sys.exit()
3062                 retcode = fd.download(all_urls)
3063
3064                 # Dump cookie jar if requested
3065                 if opts.cookiefile is not None:
3066                         try:
3067                                 jar.save()
3068                         except (IOError, OSError), err:
3069                                 sys.exit(u'ERROR: unable to save cookie jar')
3070
3071                 sys.exit(retcode)
3072
3073         except DownloadError:
3074                 sys.exit(1)
3075         except SameFileError:
3076                 sys.exit(u'ERROR: fixed output name but more than one file to download')
3077         except KeyboardInterrupt:
3078                 sys.exit(u'\nERROR: Interrupted by user')