]> gitweb @ CieloNegro.org - youtube-dl.git/blob - youtube-dl
d0584a3126c6818e86ed96821e8bbabcfc2a9c7c
[youtube-dl.git] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # License: Public domain code
7 import cookielib
8 import datetime
9 import htmlentitydefs
10 import httplib
11 import locale
12 import math
13 import netrc
14 import os
15 import os.path
16 import re
17 import socket
18 import string
19 import subprocess
20 import sys
21 import time
22 import urllib
23 import urllib2
24
25 # parse_qs was moved from the cgi module to the urlparse module recently.
26 try:
27         from urlparse import parse_qs
28 except ImportError:
29         from cgi import parse_qs
30
31 std_headers = {
32         'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12',
33         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
34         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
35         'Accept-Language': 'en-us,en;q=0.5',
36 }
37
38 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
39
40 def preferredencoding():
41         """Get preferred encoding.
42
43         Returns the best encoding scheme for the system, based on
44         locale.getpreferredencoding() and some further tweaks.
45         """
46         def yield_preferredencoding():
47                 try:
48                         pref = locale.getpreferredencoding()
49                         u'TEST'.encode(pref)
50                 except:
51                         pref = 'UTF-8'
52                 while True:
53                         yield pref
54         return yield_preferredencoding().next()
55
56 def htmlentity_transform(matchobj):
57         """Transforms an HTML entity to a Unicode character.
58         
59         This function receives a match object and is intended to be used with
60         the re.sub() function.
61         """
62         entity = matchobj.group(1)
63
64         # Known non-numeric HTML entity
65         if entity in htmlentitydefs.name2codepoint:
66                 return unichr(htmlentitydefs.name2codepoint[entity])
67
68         # Unicode character
69         mobj = re.match(ur'(?u)#(x?\d+)', entity)
70         if mobj is not None:
71                 numstr = mobj.group(1)
72                 if numstr.startswith(u'x'):
73                         base = 16
74                         numstr = u'0%s' % numstr
75                 else:
76                         base = 10
77                 return unichr(long(numstr, base))
78
79         # Unknown entity in name, return its literal representation
80         return (u'&%s;' % entity)
81
82 def sanitize_title(utitle):
83         """Sanitizes a video title so it could be used as part of a filename."""
84         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
85         return utitle.replace(unicode(os.sep), u'%')
86
87 def sanitize_open(filename, open_mode):
88         """Try to open the given filename, and slightly tweak it if this fails.
89
90         Attempts to open the given filename. If this fails, it tries to change
91         the filename slightly, step by step, until it's either able to open it
92         or it fails and raises a final exception, like the standard open()
93         function.
94
95         It returns the tuple (stream, definitive_file_name).
96         """
97         try:
98                 if filename == u'-':
99                         if sys.platform == 'win32':
100                                 import msvcrt
101                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
102                         return (sys.stdout, filename)
103                 stream = open(filename, open_mode)
104                 return (stream, filename)
105         except (IOError, OSError), err:
106                 # In case of error, try to remove win32 forbidden chars
107                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
108
109                 # An exception here should be caught in the caller
110                 stream = open(filename, open_mode)
111                 return (stream, filename)
112
113
114 class DownloadError(Exception):
115         """Download Error exception.
116         
117         This exception may be thrown by FileDownloader objects if they are not
118         configured to continue on errors. They will contain the appropriate
119         error message.
120         """
121         pass
122
123 class SameFileError(Exception):
124         """Same File exception.
125
126         This exception will be thrown by FileDownloader objects if they detect
127         multiple files would have to be downloaded to the same file on disk.
128         """
129         pass
130
131 class PostProcessingError(Exception):
132         """Post Processing exception.
133
134         This exception may be raised by PostProcessor's .run() method to
135         indicate an error in the postprocessing task.
136         """
137         pass
138
139 class UnavailableVideoError(Exception):
140         """Unavailable Format exception.
141
142         This exception will be thrown when a video is requested
143         in a format that is not available for that video.
144         """
145         pass
146
147 class ContentTooShortError(Exception):
148         """Content Too Short exception.
149
150         This exception may be raised by FileDownloader objects when a file they
151         download is too small for what the server announced first, indicating
152         the connection was probably interrupted.
153         """
154         # Both in bytes
155         downloaded = None
156         expected = None
157
158         def __init__(self, downloaded, expected):
159                 self.downloaded = downloaded
160                 self.expected = expected
161
162 class FileDownloader(object):
163         """File Downloader class.
164
165         File downloader objects are the ones responsible of downloading the
166         actual video file and writing it to disk if the user has requested
167         it, among some other tasks. In most cases there should be one per
168         program. As, given a video URL, the downloader doesn't know how to
169         extract all the needed information, task that InfoExtractors do, it
170         has to pass the URL to one of them.
171
172         For this, file downloader objects have a method that allows
173         InfoExtractors to be registered in a given order. When it is passed
174         a URL, the file downloader handles it to the first InfoExtractor it
175         finds that reports being able to handle it. The InfoExtractor extracts
176         all the information about the video or videos the URL refers to, and
177         asks the FileDownloader to process the video information, possibly
178         downloading the video.
179
180         File downloaders accept a lot of parameters. In order not to saturate
181         the object constructor with arguments, it receives a dictionary of
182         options instead. These options are available through the params
183         attribute for the InfoExtractors to use. The FileDownloader also
184         registers itself as the downloader in charge for the InfoExtractors
185         that are added to it, so this is a "mutual registration".
186
187         Available options:
188
189         username:         Username for authentication purposes.
190         password:         Password for authentication purposes.
191         usenetrc:         Use netrc for authentication instead.
192         quiet:            Do not print messages to stdout.
193         forceurl:         Force printing final URL.
194         forcetitle:       Force printing title.
195         forcethumbnail:   Force printing thumbnail URL.
196         forcedescription: Force printing description.
197         simulate:         Do not download the video files.
198         format:           Video format code.
199         format_limit:     Highest quality format to try.
200         outtmpl:          Template for output names.
201         ignoreerrors:     Do not stop on download errors.
202         ratelimit:        Download speed limit, in bytes/sec.
203         nooverwrites:     Prevent overwriting files.
204         retries:          Number of times to retry for HTTP error 5xx
205         continuedl:       Try to continue downloads if possible.
206         noprogress:       Do not print the progress bar.
207         playliststart:    Playlist item to start at.
208         playlistend:      Playlist item to end at.
209         logtostderr:      Log messages to stderr instead of stdout.
210         """
211
212         params = None
213         _ies = []
214         _pps = []
215         _download_retcode = None
216         _num_downloads = None
217         _screen_file = None
218
219         def __init__(self, params):
220                 """Create a FileDownloader object with the given options."""
221                 self._ies = []
222                 self._pps = []
223                 self._download_retcode = 0
224                 self._num_downloads = 0
225                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
226                 self.params = params
227         
228         @staticmethod
229         def pmkdir(filename):
230                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
231                 components = filename.split(os.sep)
232                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
233                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
234                 for dir in aggregate:
235                         if not os.path.exists(dir):
236                                 os.mkdir(dir)
237         
238         @staticmethod
239         def temp_name(filename):
240                 """Returns a temporary filename for the given filename."""
241                 return filename + '.part'
242         
243         @staticmethod
244         def format_bytes(bytes):
245                 if bytes is None:
246                         return 'N/A'
247                 if type(bytes) is str:
248                         bytes = float(bytes)
249                 if bytes == 0.0:
250                         exponent = 0
251                 else:
252                         exponent = long(math.log(bytes, 1024.0))
253                 suffix = 'bkMGTPEZY'[exponent]
254                 converted = float(bytes) / float(1024**exponent)
255                 return '%.2f%s' % (converted, suffix)
256
257         @staticmethod
258         def calc_percent(byte_counter, data_len):
259                 if data_len is None:
260                         return '---.-%'
261                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
262
263         @staticmethod
264         def calc_eta(start, now, total, current):
265                 if total is None:
266                         return '--:--'
267                 dif = now - start
268                 if current == 0 or dif < 0.001: # One millisecond
269                         return '--:--'
270                 rate = float(current) / dif
271                 eta = long((float(total) - float(current)) / rate)
272                 (eta_mins, eta_secs) = divmod(eta, 60)
273                 if eta_mins > 99:
274                         return '--:--'
275                 return '%02d:%02d' % (eta_mins, eta_secs)
276
277         @staticmethod
278         def calc_speed(start, now, bytes):
279                 dif = now - start
280                 if bytes == 0 or dif < 0.001: # One millisecond
281                         return '%10s' % '---b/s'
282                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
283
284         @staticmethod
285         def best_block_size(elapsed_time, bytes):
286                 new_min = max(bytes / 2.0, 1.0)
287                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
288                 if elapsed_time < 0.001:
289                         return long(new_max)
290                 rate = bytes / elapsed_time
291                 if rate > new_max:
292                         return long(new_max)
293                 if rate < new_min:
294                         return long(new_min)
295                 return long(rate)
296
297         @staticmethod
298         def parse_bytes(bytestr):
299                 """Parse a string indicating a byte quantity into a long integer."""
300                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
301                 if matchobj is None:
302                         return None
303                 number = float(matchobj.group(1))
304                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
305                 return long(round(number * multiplier))
306
307         def add_info_extractor(self, ie):
308                 """Add an InfoExtractor object to the end of the list."""
309                 self._ies.append(ie)
310                 ie.set_downloader(self)
311         
312         def add_post_processor(self, pp):
313                 """Add a PostProcessor object to the end of the chain."""
314                 self._pps.append(pp)
315                 pp.set_downloader(self)
316         
317         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
318                 """Print message to stdout if not in quiet mode."""
319                 try:
320                         if not self.params.get('quiet', False):
321                                 terminator = [u'\n', u''][skip_eol]
322                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
323                         self._screen_file.flush()
324                 except (UnicodeEncodeError), err:
325                         if not ignore_encoding_errors:
326                                 raise
327         
328         def to_stderr(self, message):
329                 """Print message to stderr."""
330                 print >>sys.stderr, message.encode(preferredencoding())
331         
332         def fixed_template(self):
333                 """Checks if the output template is fixed."""
334                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
335
336         def trouble(self, message=None):
337                 """Determine action to take when a download problem appears.
338
339                 Depending on if the downloader has been configured to ignore
340                 download errors or not, this method may throw an exception or
341                 not when errors are found, after printing the message.
342                 """
343                 if message is not None:
344                         self.to_stderr(message)
345                 if not self.params.get('ignoreerrors', False):
346                         raise DownloadError(message)
347                 self._download_retcode = 1
348
349         def slow_down(self, start_time, byte_counter):
350                 """Sleep if the download speed is over the rate limit."""
351                 rate_limit = self.params.get('ratelimit', None)
352                 if rate_limit is None or byte_counter == 0:
353                         return
354                 now = time.time()
355                 elapsed = now - start_time
356                 if elapsed <= 0.0:
357                         return
358                 speed = float(byte_counter) / elapsed
359                 if speed > rate_limit:
360                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
361         
362         def try_rename(self, old_filename, new_filename):
363                 try:
364                         os.rename(old_filename, new_filename)
365                 except (IOError, OSError), err:
366                         self.trouble(u'ERROR: unable to rename file')
367
368         def report_destination(self, filename):
369                 """Report destination filename."""
370                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
371         
372         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
373                 """Report download progress."""
374                 if self.params.get('noprogress', False):
375                         return
376                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
377                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
378
379         def report_resuming_byte(self, resume_len):
380                 """Report attempt to resume at given byte."""
381                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
382         
383         def report_retry(self, count, retries):
384                 """Report retry in case of HTTP error 5xx"""
385                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
386         
387         def report_file_already_downloaded(self, file_name):
388                 """Report file has already been fully downloaded."""
389                 try:
390                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
391                 except (UnicodeEncodeError), err:
392                         self.to_screen(u'[download] The file has already been downloaded')
393         
394         def report_unable_to_resume(self):
395                 """Report it was impossible to resume download."""
396                 self.to_screen(u'[download] Unable to resume')
397         
398         def report_finish(self):
399                 """Report download finished."""
400                 if self.params.get('noprogress', False):
401                         self.to_screen(u'[download] Download completed')
402                 else:
403                         self.to_screen(u'')
404         
405         def increment_downloads(self):
406                 """Increment the ordinal that assigns a number to each file."""
407                 self._num_downloads += 1
408
409         def process_info(self, info_dict):
410                 """Process a single dictionary returned by an InfoExtractor."""
411                 # Do nothing else if in simulate mode
412                 if self.params.get('simulate', False):
413                         # Forced printings
414                         if self.params.get('forcetitle', False):
415                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
416                         if self.params.get('forceurl', False):
417                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
418                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
419                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
420                         if self.params.get('forcedescription', False) and 'description' in info_dict:
421                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
422
423                         return
424                         
425                 try:
426                         template_dict = dict(info_dict)
427                         template_dict['epoch'] = unicode(long(time.time()))
428                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
429                         filename = self.params['outtmpl'] % template_dict
430                 except (ValueError, KeyError), err:
431                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
432                         return
433                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
434                         self.to_stderr(u'WARNING: file exists and will be skipped')
435                         return
436
437                 try:
438                         self.pmkdir(filename)
439                 except (OSError, IOError), err:
440                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
441                         return
442
443                 try:
444                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
445                 except (OSError, IOError), err:
446                         raise UnavailableVideoError
447                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
448                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
449                         return
450                 except (ContentTooShortError, ), err:
451                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
452                         return
453
454                 if success:
455                         try:
456                                 self.post_process(filename, info_dict)
457                         except (PostProcessingError), err:
458                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
459                                 return
460
461         def download(self, url_list):
462                 """Download a given list of URLs."""
463                 if len(url_list) > 1 and self.fixed_template():
464                         raise SameFileError(self.params['outtmpl'])
465
466                 for url in url_list:
467                         suitable_found = False
468                         for ie in self._ies:
469                                 # Go to next InfoExtractor if not suitable
470                                 if not ie.suitable(url):
471                                         continue
472
473                                 # Suitable InfoExtractor found
474                                 suitable_found = True
475
476                                 # Extract information from URL and process it
477                                 ie.extract(url)
478
479                                 # Suitable InfoExtractor had been found; go to next URL
480                                 break
481
482                         if not suitable_found:
483                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
484
485                 return self._download_retcode
486
487         def post_process(self, filename, ie_info):
488                 """Run the postprocessing chain on the given file."""
489                 info = dict(ie_info)
490                 info['filepath'] = filename
491                 for pp in self._pps:
492                         info = pp.run(info)
493                         if info is None:
494                                 break
495         
496         def _download_with_rtmpdump(self, filename, url, player_url):
497                 self.report_destination(filename)
498                 tmpfilename = self.temp_name(filename)
499
500                 # Check for rtmpdump first
501                 try:
502                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
503                 except (OSError, IOError):
504                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
505                         return False
506
507                 # Download using rtmpdump. rtmpdump returns exit code 2 when
508                 # the connection was interrumpted and resuming appears to be
509                 # possible. This is part of rtmpdump's normal usage, AFAIK.
510                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
511                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
512                 while retval == 2 or retval == 1:
513                         prevsize = os.path.getsize(tmpfilename)
514                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
515                         time.sleep(5.0) # This seems to be needed
516                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
517                         cursize = os.path.getsize(tmpfilename)
518                         if prevsize == cursize and retval == 1:
519                                 break
520                 if retval == 0:
521                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
522                         self.try_rename(tmpfilename, filename)
523                         return True
524                 else:
525                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
526                         return False
527
528         def _do_download(self, filename, url, player_url):
529                 # Check file already present
530                 if self.params.get('continuedl', False) and os.path.isfile(filename):
531                         self.report_file_already_downloaded(filename)
532                         return True
533
534                 # Attempt to download using rtmpdump
535                 if url.startswith('rtmp'):
536                         return self._download_with_rtmpdump(filename, url, player_url)
537
538                 tmpfilename = self.temp_name(filename)
539                 stream = None
540                 open_mode = 'wb'
541                 basic_request = urllib2.Request(url, None, std_headers)
542                 request = urllib2.Request(url, None, std_headers)
543
544                 # Establish possible resume length
545                 if os.path.isfile(tmpfilename):
546                         resume_len = os.path.getsize(tmpfilename)
547                 else:
548                         resume_len = 0
549
550                 # Request parameters in case of being able to resume
551                 if self.params.get('continuedl', False) and resume_len != 0:
552                         self.report_resuming_byte(resume_len)
553                         request.add_header('Range','bytes=%d-' % resume_len)
554                         open_mode = 'ab'
555
556                 count = 0
557                 retries = self.params.get('retries', 0)
558                 while count <= retries:
559                         # Establish connection
560                         try:
561                                 data = urllib2.urlopen(request)
562                                 break
563                         except (urllib2.HTTPError, ), err:
564                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
565                                         # Unexpected HTTP error
566                                         raise
567                                 elif err.code == 416:
568                                         # Unable to resume (requested range not satisfiable)
569                                         try:
570                                                 # Open the connection again without the range header
571                                                 data = urllib2.urlopen(basic_request)
572                                                 content_length = data.info()['Content-Length']
573                                         except (urllib2.HTTPError, ), err:
574                                                 if err.code < 500 or err.code >= 600:
575                                                         raise
576                                         else:
577                                                 # Examine the reported length
578                                                 if (content_length is not None and
579                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
580                                                         # The file had already been fully downloaded.
581                                                         # Explanation to the above condition: in issue #175 it was revealed that
582                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
583                                                         # changing the file size slightly and causing problems for some users. So
584                                                         # I decided to implement a suggested change and consider the file
585                                                         # completely downloaded if the file size differs less than 100 bytes from
586                                                         # the one in the hard drive.
587                                                         self.report_file_already_downloaded(filename)
588                                                         self.try_rename(tmpfilename, filename)
589                                                         return True
590                                                 else:
591                                                         # The length does not match, we start the download over
592                                                         self.report_unable_to_resume()
593                                                         open_mode = 'wb'
594                                                         break
595                         # Retry
596                         count += 1
597                         if count <= retries:
598                                 self.report_retry(count, retries)
599
600                 if count > retries:
601                         self.trouble(u'ERROR: giving up after %s retries' % retries)
602                         return False
603
604                 data_len = data.info().get('Content-length', None)
605                 data_len_str = self.format_bytes(data_len)
606                 byte_counter = 0
607                 block_size = 1024
608                 start = time.time()
609                 while True:
610                         # Download and write
611                         before = time.time()
612                         data_block = data.read(block_size)
613                         after = time.time()
614                         data_block_len = len(data_block)
615                         if data_block_len == 0:
616                                 break
617                         byte_counter += data_block_len
618
619                         # Open file just in time
620                         if stream is None:
621                                 try:
622                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
623                                         self.report_destination(filename)
624                                 except (OSError, IOError), err:
625                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
626                                         return False
627                         try:
628                                 stream.write(data_block)
629                         except (IOError, OSError), err:
630                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
631                                 return False
632                         block_size = self.best_block_size(after - before, data_block_len)
633
634                         # Progress message
635                         percent_str = self.calc_percent(byte_counter, data_len)
636                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
637                         speed_str = self.calc_speed(start, time.time(), byte_counter)
638                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
639
640                         # Apply rate limit
641                         self.slow_down(start, byte_counter)
642
643                 self.report_finish()
644                 if data_len is not None and str(byte_counter) != data_len:
645                         raise ContentTooShortError(byte_counter, long(data_len))
646                 self.try_rename(tmpfilename, filename)
647                 return True
648
649 class InfoExtractor(object):
650         """Information Extractor class.
651
652         Information extractors are the classes that, given a URL, extract
653         information from the video (or videos) the URL refers to. This
654         information includes the real video URL, the video title and simplified
655         title, author and others. The information is stored in a dictionary
656         which is then passed to the FileDownloader. The FileDownloader
657         processes this information possibly downloading the video to the file
658         system, among other possible outcomes. The dictionaries must include
659         the following fields:
660
661         id:             Video identifier.
662         url:            Final video URL.
663         uploader:       Nickname of the video uploader.
664         title:          Literal title.
665         stitle:         Simplified title.
666         ext:            Video filename extension.
667         format:         Video format.
668         player_url:     SWF Player URL (may be None).
669
670         The following fields are optional. Their primary purpose is to allow
671         youtube-dl to serve as the backend for a video search function, such
672         as the one in youtube2mp3.  They are only used when their respective
673         forced printing functions are called:
674
675         thumbnail:      Full URL to a video thumbnail image.
676         description:    One-line video description.
677
678         Subclasses of this one should re-define the _real_initialize() and
679         _real_extract() methods, as well as the suitable() static method.
680         Probably, they should also be instantiated and added to the main
681         downloader.
682         """
683
684         _ready = False
685         _downloader = None
686
687         def __init__(self, downloader=None):
688                 """Constructor. Receives an optional downloader."""
689                 self._ready = False
690                 self.set_downloader(downloader)
691
692         @staticmethod
693         def suitable(url):
694                 """Receives a URL and returns True if suitable for this IE."""
695                 return False
696
697         def initialize(self):
698                 """Initializes an instance (authentication, etc)."""
699                 if not self._ready:
700                         self._real_initialize()
701                         self._ready = True
702
703         def extract(self, url):
704                 """Extracts URL information and returns it in list of dicts."""
705                 self.initialize()
706                 return self._real_extract(url)
707
708         def set_downloader(self, downloader):
709                 """Sets the downloader for this IE."""
710                 self._downloader = downloader
711         
712         def _real_initialize(self):
713                 """Real initialization process. Redefine in subclasses."""
714                 pass
715
716         def _real_extract(self, url):
717                 """Real extraction process. Redefine in subclasses."""
718                 pass
719
720 class YoutubeIE(InfoExtractor):
721         """Information extractor for youtube.com."""
722
723         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
724         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
725         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
726         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
727         _NETRC_MACHINE = 'youtube'
728         # Listed in order of quality
729         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
730         _video_extensions = {
731                 '13': '3gp',
732                 '17': 'mp4',
733                 '18': 'mp4',
734                 '22': 'mp4',
735                 '37': 'mp4',
736                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
737                 '43': 'webm',
738                 '45': 'webm',
739         }
740
741         @staticmethod
742         def suitable(url):
743                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
744
745         def report_lang(self):
746                 """Report attempt to set language."""
747                 self._downloader.to_screen(u'[youtube] Setting language')
748
749         def report_login(self):
750                 """Report attempt to log in."""
751                 self._downloader.to_screen(u'[youtube] Logging in')
752         
753         def report_age_confirmation(self):
754                 """Report attempt to confirm age."""
755                 self._downloader.to_screen(u'[youtube] Confirming age')
756         
757         def report_video_webpage_download(self, video_id):
758                 """Report attempt to download video webpage."""
759                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
760         
761         def report_video_info_webpage_download(self, video_id):
762                 """Report attempt to download video info webpage."""
763                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
764         
765         def report_information_extraction(self, video_id):
766                 """Report attempt to extract video information."""
767                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
768         
769         def report_unavailable_format(self, video_id, format):
770                 """Report extracted video URL."""
771                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
772         
773         def report_rtmp_download(self):
774                 """Indicate the download will use the RTMP protocol."""
775                 self._downloader.to_screen(u'[youtube] RTMP download detected')
776         
777         def _real_initialize(self):
778                 if self._downloader is None:
779                         return
780
781                 username = None
782                 password = None
783                 downloader_params = self._downloader.params
784
785                 # Attempt to use provided username and password or .netrc data
786                 if downloader_params.get('username', None) is not None:
787                         username = downloader_params['username']
788                         password = downloader_params['password']
789                 elif downloader_params.get('usenetrc', False):
790                         try:
791                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
792                                 if info is not None:
793                                         username = info[0]
794                                         password = info[2]
795                                 else:
796                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
797                         except (IOError, netrc.NetrcParseError), err:
798                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
799                                 return
800
801                 # Set language
802                 request = urllib2.Request(self._LANG_URL, None, std_headers)
803                 try:
804                         self.report_lang()
805                         urllib2.urlopen(request).read()
806                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
807                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
808                         return
809
810                 # No authentication to be performed
811                 if username is None:
812                         return
813
814                 # Log in
815                 login_form = {
816                                 'current_form': 'loginForm',
817                                 'next':         '/',
818                                 'action_login': 'Log In',
819                                 'username':     username,
820                                 'password':     password,
821                                 }
822                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
823                 try:
824                         self.report_login()
825                         login_results = urllib2.urlopen(request).read()
826                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
827                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
828                                 return
829                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
830                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
831                         return
832         
833                 # Confirm age
834                 age_form = {
835                                 'next_url':             '/',
836                                 'action_confirm':       'Confirm',
837                                 }
838                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
839                 try:
840                         self.report_age_confirmation()
841                         age_results = urllib2.urlopen(request).read()
842                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
843                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
844                         return
845
846         def _real_extract(self, url):
847                 # Extract video id from URL
848                 mobj = re.match(self._VALID_URL, url)
849                 if mobj is None:
850                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
851                         return
852                 video_id = mobj.group(2)
853
854                 # Get video webpage
855                 self.report_video_webpage_download(video_id)
856                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id, None, std_headers)
857                 try:
858                         video_webpage = urllib2.urlopen(request).read()
859                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
860                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
861                         return
862
863                 # Attempt to extract SWF player URL
864                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
865                 if mobj is not None:
866                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
867                 else:
868                         player_url = None
869
870                 # Get video info
871                 self.report_video_info_webpage_download(video_id)
872                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
873                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
874                                            % (video_id, el_type))
875                         request = urllib2.Request(video_info_url, None, std_headers)
876                         try:
877                                 video_info_webpage = urllib2.urlopen(request).read()
878                                 video_info = parse_qs(video_info_webpage)
879                                 if 'token' in video_info:
880                                         break
881                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
882                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
883                                 return
884                 if 'token' not in video_info:
885                         if 'reason' in video_info:
886                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
887                         else:
888                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
889                         return
890
891                 # Start extracting information
892                 self.report_information_extraction(video_id)
893
894                 # uploader
895                 if 'author' not in video_info:
896                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
897                         return
898                 video_uploader = urllib.unquote_plus(video_info['author'][0])
899
900                 # title
901                 if 'title' not in video_info:
902                         self._downloader.trouble(u'ERROR: unable to extract video title')
903                         return
904                 video_title = urllib.unquote_plus(video_info['title'][0])
905                 video_title = video_title.decode('utf-8')
906                 video_title = sanitize_title(video_title)
907
908                 # simplified title
909                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
910                 simple_title = simple_title.strip(ur'_')
911
912                 # thumbnail image
913                 if 'thumbnail_url' not in video_info:
914                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
915                         video_thumbnail = ''
916                 else:   # don't panic if we can't find it
917                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
918
919                 # upload date
920                 upload_date = u'NA'
921                 mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
922                 if mobj is not None:
923                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
924                         format_expressions = ['%d %B %Y', '%B %d %Y']
925                         for expression in format_expressions:
926                                 try:
927                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
928                                 except:
929                                         pass
930
931                 # description
932                 video_description = 'No description available.'
933                 if self._downloader.params.get('forcedescription', False):
934                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
935                         if mobj is not None:
936                                 video_description = mobj.group(1)
937
938                 # token
939                 video_token = urllib.unquote_plus(video_info['token'][0])
940
941                 # Decide which formats to download
942                 requested_format = self._downloader.params.get('format', None)
943                 get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
944
945                 if 'fmt_url_map' in video_info:
946                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
947                         format_limit = self._downloader.params.get('format_limit', None)
948                         if format_limit is not None and format_limit in self._available_formats:
949                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
950                         else:
951                                 format_list = self._available_formats
952                         existing_formats = [x for x in format_list if x in url_map]
953                         if len(existing_formats) == 0:
954                                 self._downloader.trouble(u'ERROR: no known formats available for video')
955                                 return
956                         if requested_format is None:
957                                 video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality
958                         elif requested_format == '-1':
959                                 video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats
960                         else:
961                                 video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format
962
963                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
964                         self.report_rtmp_download()
965                         video_url_list = [(None, video_info['conn'][0])]
966
967                 else:
968                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
969                         return
970
971                 for format_param, video_real_url in video_url_list:
972                         # At this point we have a new video
973                         self._downloader.increment_downloads()
974
975                         # Extension
976                         video_extension = self._video_extensions.get(format_param, 'flv')
977
978                         # Find the video URL in fmt_url_map or conn paramters
979                         try:
980                                 # Process video information
981                                 self._downloader.process_info({
982                                         'id':           video_id.decode('utf-8'),
983                                         'url':          video_real_url.decode('utf-8'),
984                                         'uploader':     video_uploader.decode('utf-8'),
985                                         'upload_date':  upload_date,
986                                         'title':        video_title,
987                                         'stitle':       simple_title,
988                                         'ext':          video_extension.decode('utf-8'),
989                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
990                                         'thumbnail':    video_thumbnail.decode('utf-8'),
991                                         'description':  video_description.decode('utf-8'),
992                                         'player_url':   player_url,
993                                 })
994                         except UnavailableVideoError, err:
995                                 self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
996
997
998 class MetacafeIE(InfoExtractor):
999         """Information Extractor for metacafe.com."""
1000
1001         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1002         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1003         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1004         _youtube_ie = None
1005
1006         def __init__(self, youtube_ie, downloader=None):
1007                 InfoExtractor.__init__(self, downloader)
1008                 self._youtube_ie = youtube_ie
1009
1010         @staticmethod
1011         def suitable(url):
1012                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1013
1014         def report_disclaimer(self):
1015                 """Report disclaimer retrieval."""
1016                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1017
1018         def report_age_confirmation(self):
1019                 """Report attempt to confirm age."""
1020                 self._downloader.to_screen(u'[metacafe] Confirming age')
1021         
1022         def report_download_webpage(self, video_id):
1023                 """Report webpage download."""
1024                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1025         
1026         def report_extraction(self, video_id):
1027                 """Report information extraction."""
1028                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1029
1030         def _real_initialize(self):
1031                 # Retrieve disclaimer
1032                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
1033                 try:
1034                         self.report_disclaimer()
1035                         disclaimer = urllib2.urlopen(request).read()
1036                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1037                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1038                         return
1039
1040                 # Confirm age
1041                 disclaimer_form = {
1042                         'filters': '0',
1043                         'submit': "Continue - I'm over 18",
1044                         }
1045                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
1046                 try:
1047                         self.report_age_confirmation()
1048                         disclaimer = urllib2.urlopen(request).read()
1049                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1050                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1051                         return
1052         
1053         def _real_extract(self, url):
1054                 # Extract id and simplified title from URL
1055                 mobj = re.match(self._VALID_URL, url)
1056                 if mobj is None:
1057                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1058                         return
1059
1060                 video_id = mobj.group(1)
1061
1062                 # Check if video comes from YouTube
1063                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1064                 if mobj2 is not None:
1065                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1066                         return
1067
1068                 # At this point we have a new video
1069                 self._downloader.increment_downloads()
1070
1071                 simple_title = mobj.group(2).decode('utf-8')
1072
1073                 # Retrieve video webpage to extract further information
1074                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1075                 try:
1076                         self.report_download_webpage(video_id)
1077                         webpage = urllib2.urlopen(request).read()
1078                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1079                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1080                         return
1081
1082                 # Extract URL, uploader and title from webpage
1083                 self.report_extraction(video_id)
1084                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1085                 if mobj is not None:
1086                         mediaURL = urllib.unquote(mobj.group(1))
1087                         video_extension = mediaURL[-3:]
1088                         
1089                         # Extract gdaKey if available
1090                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1091                         if mobj is None:
1092                                 video_url = mediaURL
1093                         else:
1094                                 gdaKey = mobj.group(1)
1095                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1096                 else:
1097                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1098                         if mobj is None:
1099                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1100                                 return
1101                         vardict = parse_qs(mobj.group(1))
1102                         if 'mediaData' not in vardict:
1103                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1104                                 return
1105                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1106                         if mobj is None:
1107                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1108                                 return
1109                         mediaURL = mobj.group(1).replace('\\/', '/')
1110                         video_extension = mediaURL[-3:]
1111                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1112
1113                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1114                 if mobj is None:
1115                         self._downloader.trouble(u'ERROR: unable to extract title')
1116                         return
1117                 video_title = mobj.group(1).decode('utf-8')
1118                 video_title = sanitize_title(video_title)
1119
1120                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1121                 if mobj is None:
1122                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1123                         return
1124                 video_uploader = mobj.group(1)
1125
1126                 try:
1127                         # Process video information
1128                         self._downloader.process_info({
1129                                 'id':           video_id.decode('utf-8'),
1130                                 'url':          video_url.decode('utf-8'),
1131                                 'uploader':     video_uploader.decode('utf-8'),
1132                                 'upload_date':  u'NA',
1133                                 'title':        video_title,
1134                                 'stitle':       simple_title,
1135                                 'ext':          video_extension.decode('utf-8'),
1136                                 'format':       u'NA',
1137                                 'player_url':   None,
1138                         })
1139                 except UnavailableVideoError:
1140                         self._downloader.trouble(u'ERROR: unable to download video')
1141
1142
1143 class DailymotionIE(InfoExtractor):
1144         """Information Extractor for Dailymotion"""
1145
1146         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1147
1148         def __init__(self, downloader=None):
1149                 InfoExtractor.__init__(self, downloader)
1150
1151         @staticmethod
1152         def suitable(url):
1153                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1154
1155         def report_download_webpage(self, video_id):
1156                 """Report webpage download."""
1157                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1158         
1159         def report_extraction(self, video_id):
1160                 """Report information extraction."""
1161                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1162
1163         def _real_initialize(self):
1164                 return
1165
1166         def _real_extract(self, url):
1167                 # Extract id and simplified title from URL
1168                 mobj = re.match(self._VALID_URL, url)
1169                 if mobj is None:
1170                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1171                         return
1172
1173                 # At this point we have a new video
1174                 self._downloader.increment_downloads()
1175                 video_id = mobj.group(1)
1176
1177                 simple_title = mobj.group(2).decode('utf-8')
1178                 video_extension = 'flv'
1179
1180                 # Retrieve video webpage to extract further information
1181                 request = urllib2.Request(url)
1182                 try:
1183                         self.report_download_webpage(video_id)
1184                         webpage = urllib2.urlopen(request).read()
1185                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1186                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1187                         return
1188
1189                 # Extract URL, uploader and title from webpage
1190                 self.report_extraction(video_id)
1191                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1192                 if mobj is None:
1193                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1194                         return
1195                 mediaURL = urllib.unquote(mobj.group(1))
1196
1197                 # if needed add http://www.dailymotion.com/ if relative URL
1198
1199                 video_url = mediaURL
1200
1201                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1202                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1203                 if mobj is None:
1204                         self._downloader.trouble(u'ERROR: unable to extract title')
1205                         return
1206                 video_title = mobj.group(1).decode('utf-8')
1207                 video_title = sanitize_title(video_title)
1208
1209                 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
1210                 if mobj is None:
1211                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1212                         return
1213                 video_uploader = mobj.group(1)
1214
1215                 try:
1216                         # Process video information
1217                         self._downloader.process_info({
1218                                 'id':           video_id.decode('utf-8'),
1219                                 'url':          video_url.decode('utf-8'),
1220                                 'uploader':     video_uploader.decode('utf-8'),
1221                                 'upload_date':  u'NA',
1222                                 'title':        video_title,
1223                                 'stitle':       simple_title,
1224                                 'ext':          video_extension.decode('utf-8'),
1225                                 'format':       u'NA',
1226                                 'player_url':   None,
1227                         })
1228                 except UnavailableVideoError:
1229                         self._downloader.trouble(u'ERROR: unable to download video')
1230
1231 class GoogleIE(InfoExtractor):
1232         """Information extractor for video.google.com."""
1233
1234         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1235
1236         def __init__(self, downloader=None):
1237                 InfoExtractor.__init__(self, downloader)
1238
1239         @staticmethod
1240         def suitable(url):
1241                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1242
1243         def report_download_webpage(self, video_id):
1244                 """Report webpage download."""
1245                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1246
1247         def report_extraction(self, video_id):
1248                 """Report information extraction."""
1249                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1250
1251         def _real_initialize(self):
1252                 return
1253
1254         def _real_extract(self, url):
1255                 # Extract id from URL
1256                 mobj = re.match(self._VALID_URL, url)
1257                 if mobj is None:
1258                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1259                         return
1260
1261                 # At this point we have a new video
1262                 self._downloader.increment_downloads()
1263                 video_id = mobj.group(1)
1264
1265                 video_extension = 'mp4'
1266
1267                 # Retrieve video webpage to extract further information
1268                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1269                 try:
1270                         self.report_download_webpage(video_id)
1271                         webpage = urllib2.urlopen(request).read()
1272                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1273                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1274                         return
1275
1276                 # Extract URL, uploader, and title from webpage
1277                 self.report_extraction(video_id)
1278                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1279                 if mobj is None:
1280                         video_extension = 'flv'
1281                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1282                 if mobj is None:
1283                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1284                         return
1285                 mediaURL = urllib.unquote(mobj.group(1))
1286                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1287                 mediaURL = mediaURL.replace('\\x26', '\x26')
1288
1289                 video_url = mediaURL
1290
1291                 mobj = re.search(r'<title>(.*)</title>', webpage)
1292                 if mobj is None:
1293                         self._downloader.trouble(u'ERROR: unable to extract title')
1294                         return
1295                 video_title = mobj.group(1).decode('utf-8')
1296                 video_title = sanitize_title(video_title)
1297                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1298
1299                 # Extract video description
1300                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1301                 if mobj is None:
1302                         self._downloader.trouble(u'ERROR: unable to extract video description')
1303                         return
1304                 video_description = mobj.group(1).decode('utf-8')
1305                 if not video_description:
1306                         video_description = 'No description available.'
1307
1308                 # Extract video thumbnail
1309                 if self._downloader.params.get('forcethumbnail', False):
1310                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1311                         try:
1312                                 webpage = urllib2.urlopen(request).read()
1313                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1314                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1315                                 return
1316                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1317                         if mobj is None:
1318                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1319                                 return
1320                         video_thumbnail = mobj.group(1)
1321                 else:   # we need something to pass to process_info
1322                         video_thumbnail = ''
1323
1324
1325                 try:
1326                         # Process video information
1327                         self._downloader.process_info({
1328                                 'id':           video_id.decode('utf-8'),
1329                                 'url':          video_url.decode('utf-8'),
1330                                 'uploader':     u'NA',
1331                                 'upload_date':  u'NA',
1332                                 'title':        video_title,
1333                                 'stitle':       simple_title,
1334                                 'ext':          video_extension.decode('utf-8'),
1335                                 'format':       u'NA',
1336                                 'player_url':   None,
1337                         })
1338                 except UnavailableVideoError:
1339                         self._downloader.trouble(u'ERROR: unable to download video')
1340
1341
1342 class PhotobucketIE(InfoExtractor):
1343         """Information extractor for photobucket.com."""
1344
1345         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1346
1347         def __init__(self, downloader=None):
1348                 InfoExtractor.__init__(self, downloader)
1349
1350         @staticmethod
1351         def suitable(url):
1352                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1353
1354         def report_download_webpage(self, video_id):
1355                 """Report webpage download."""
1356                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1357
1358         def report_extraction(self, video_id):
1359                 """Report information extraction."""
1360                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1361
1362         def _real_initialize(self):
1363                 return
1364
1365         def _real_extract(self, url):
1366                 # Extract id from URL
1367                 mobj = re.match(self._VALID_URL, url)
1368                 if mobj is None:
1369                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1370                         return
1371
1372                 # At this point we have a new video
1373                 self._downloader.increment_downloads()
1374                 video_id = mobj.group(1)
1375
1376                 video_extension = 'flv'
1377
1378                 # Retrieve video webpage to extract further information
1379                 request = urllib2.Request(url)
1380                 try:
1381                         self.report_download_webpage(video_id)
1382                         webpage = urllib2.urlopen(request).read()
1383                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1384                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1385                         return
1386
1387                 # Extract URL, uploader, and title from webpage
1388                 self.report_extraction(video_id)
1389                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1390                 if mobj is None:
1391                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1392                         return
1393                 mediaURL = urllib.unquote(mobj.group(1))
1394
1395                 video_url = mediaURL
1396
1397                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1398                 if mobj is None:
1399                         self._downloader.trouble(u'ERROR: unable to extract title')
1400                         return
1401                 video_title = mobj.group(1).decode('utf-8')
1402                 video_title = sanitize_title(video_title)
1403                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1404
1405                 video_uploader = mobj.group(2).decode('utf-8')
1406
1407                 try:
1408                         # Process video information
1409                         self._downloader.process_info({
1410                                 'id':           video_id.decode('utf-8'),
1411                                 'url':          video_url.decode('utf-8'),
1412                                 'uploader':     video_uploader,
1413                                 'upload_date':  u'NA',
1414                                 'title':        video_title,
1415                                 'stitle':       simple_title,
1416                                 'ext':          video_extension.decode('utf-8'),
1417                                 'format':       u'NA',
1418                                 'player_url':   None,
1419                         })
1420                 except UnavailableVideoError:
1421                         self._downloader.trouble(u'ERROR: unable to download video')
1422
1423
1424 class YahooIE(InfoExtractor):
1425         """Information extractor for video.yahoo.com."""
1426
1427         # _VALID_URL matches all Yahoo! Video URLs
1428         # _VPAGE_URL matches only the extractable '/watch/' URLs
1429         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1430         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1431
1432         def __init__(self, downloader=None):
1433                 InfoExtractor.__init__(self, downloader)
1434
1435         @staticmethod
1436         def suitable(url):
1437                 return (re.match(YahooIE._VALID_URL, url) is not None)
1438
1439         def report_download_webpage(self, video_id):
1440                 """Report webpage download."""
1441                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1442
1443         def report_extraction(self, video_id):
1444                 """Report information extraction."""
1445                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1446
1447         def _real_initialize(self):
1448                 return
1449
1450         def _real_extract(self, url, new_video=True):
1451                 # Extract ID from URL
1452                 mobj = re.match(self._VALID_URL, url)
1453                 if mobj is None:
1454                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1455                         return
1456
1457                 # At this point we have a new video
1458                 self._downloader.increment_downloads()
1459                 video_id = mobj.group(2)
1460                 video_extension = 'flv'
1461
1462                 # Rewrite valid but non-extractable URLs as
1463                 # extractable English language /watch/ URLs
1464                 if re.match(self._VPAGE_URL, url) is None:
1465                         request = urllib2.Request(url)
1466                         try:
1467                                 webpage = urllib2.urlopen(request).read()
1468                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1469                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1470                                 return
1471
1472                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1473                         if mobj is None:
1474                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1475                                 return
1476                         yahoo_id = mobj.group(1)
1477
1478                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1479                         if mobj is None:
1480                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1481                                 return
1482                         yahoo_vid = mobj.group(1)
1483
1484                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1485                         return self._real_extract(url, new_video=False)
1486
1487                 # Retrieve video webpage to extract further information
1488                 request = urllib2.Request(url)
1489                 try:
1490                         self.report_download_webpage(video_id)
1491                         webpage = urllib2.urlopen(request).read()
1492                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1493                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1494                         return
1495
1496                 # Extract uploader and title from webpage
1497                 self.report_extraction(video_id)
1498                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1499                 if mobj is None:
1500                         self._downloader.trouble(u'ERROR: unable to extract video title')
1501                         return
1502                 video_title = mobj.group(1).decode('utf-8')
1503                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1504
1505                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1506                 if mobj is None:
1507                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1508                         return
1509                 video_uploader = mobj.group(1).decode('utf-8')
1510
1511                 # Extract video thumbnail
1512                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1513                 if mobj is None:
1514                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1515                         return
1516                 video_thumbnail = mobj.group(1).decode('utf-8')
1517
1518                 # Extract video description
1519                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1520                 if mobj is None:
1521                         self._downloader.trouble(u'ERROR: unable to extract video description')
1522                         return
1523                 video_description = mobj.group(1).decode('utf-8')
1524                 if not video_description: video_description = 'No description available.'
1525
1526                 # Extract video height and width
1527                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1528                 if mobj is None:
1529                         self._downloader.trouble(u'ERROR: unable to extract video height')
1530                         return
1531                 yv_video_height = mobj.group(1)
1532
1533                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1534                 if mobj is None:
1535                         self._downloader.trouble(u'ERROR: unable to extract video width')
1536                         return
1537                 yv_video_width = mobj.group(1)
1538
1539                 # Retrieve video playlist to extract media URL
1540                 # I'm not completely sure what all these options are, but we
1541                 # seem to need most of them, otherwise the server sends a 401.
1542                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1543                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1544                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1545                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1546                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1547                 try:
1548                         self.report_download_webpage(video_id)
1549                         webpage = urllib2.urlopen(request).read()
1550                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1551                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1552                         return
1553
1554                 # Extract media URL from playlist XML
1555                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1556                 if mobj is None:
1557                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1558                         return
1559                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1560                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1561
1562                 try:
1563                         # Process video information
1564                         self._downloader.process_info({
1565                                 'id':           video_id.decode('utf-8'),
1566                                 'url':          video_url,
1567                                 'uploader':     video_uploader,
1568                                 'upload_date':  u'NA',
1569                                 'title':        video_title,
1570                                 'stitle':       simple_title,
1571                                 'ext':          video_extension.decode('utf-8'),
1572                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1573                                 'description':  video_description,
1574                                 'thumbnail':    video_thumbnail,
1575                                 'description':  video_description,
1576                                 'player_url':   None,
1577                         })
1578                 except UnavailableVideoError:
1579                         self._downloader.trouble(u'ERROR: unable to download video')
1580
1581
1582 class GenericIE(InfoExtractor):
1583         """Generic last-resort information extractor."""
1584
1585         def __init__(self, downloader=None):
1586                 InfoExtractor.__init__(self, downloader)
1587
1588         @staticmethod
1589         def suitable(url):
1590                 return True
1591
1592         def report_download_webpage(self, video_id):
1593                 """Report webpage download."""
1594                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1595                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1596
1597         def report_extraction(self, video_id):
1598                 """Report information extraction."""
1599                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1600
1601         def _real_initialize(self):
1602                 return
1603
1604         def _real_extract(self, url):
1605                 # At this point we have a new video
1606                 self._downloader.increment_downloads()
1607
1608                 video_id = url.split('/')[-1]
1609                 request = urllib2.Request(url)
1610                 try:
1611                         self.report_download_webpage(video_id)
1612                         webpage = urllib2.urlopen(request).read()
1613                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1614                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1615                         return
1616                 except ValueError, err:
1617                         # since this is the last-resort InfoExtractor, if
1618                         # this error is thrown, it'll be thrown here
1619                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1620                         return
1621
1622                 self.report_extraction(video_id)
1623                 # Start with something easy: JW Player in SWFObject
1624                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1625                 if mobj is None:
1626                         # Broaden the search a little bit
1627                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1628                 if mobj is None:
1629                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1630                         return
1631
1632                 # It's possible that one of the regexes
1633                 # matched, but returned an empty group:
1634                 if mobj.group(1) is None:
1635                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1636                         return
1637
1638                 video_url = urllib.unquote(mobj.group(1))
1639                 video_id  = os.path.basename(video_url)
1640
1641                 # here's a fun little line of code for you:
1642                 video_extension = os.path.splitext(video_id)[1][1:]
1643                 video_id        = os.path.splitext(video_id)[0]
1644
1645                 # it's tempting to parse this further, but you would
1646                 # have to take into account all the variations like
1647                 #   Video Title - Site Name
1648                 #   Site Name | Video Title
1649                 #   Video Title - Tagline | Site Name
1650                 # and so on and so forth; it's just not practical
1651                 mobj = re.search(r'<title>(.*)</title>', webpage)
1652                 if mobj is None:
1653                         self._downloader.trouble(u'ERROR: unable to extract title')
1654                         return
1655                 video_title = mobj.group(1).decode('utf-8')
1656                 video_title = sanitize_title(video_title)
1657                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1658
1659                 # video uploader is domain name
1660                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1661                 if mobj is None:
1662                         self._downloader.trouble(u'ERROR: unable to extract title')
1663                         return
1664                 video_uploader = mobj.group(1).decode('utf-8')
1665
1666                 try:
1667                         # Process video information
1668                         self._downloader.process_info({
1669                                 'id':           video_id.decode('utf-8'),
1670                                 'url':          video_url.decode('utf-8'),
1671                                 'uploader':     video_uploader,
1672                                 'upload_date':  u'NA',
1673                                 'title':        video_title,
1674                                 'stitle':       simple_title,
1675                                 'ext':          video_extension.decode('utf-8'),
1676                                 'format':       u'NA',
1677                                 'player_url':   None,
1678                         })
1679                 except UnavailableVideoError, err:
1680                         self._downloader.trouble(u'ERROR: unable to download video')
1681
1682
1683 class YoutubeSearchIE(InfoExtractor):
1684         """Information Extractor for YouTube search queries."""
1685         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1686         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1687         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1688         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1689         _youtube_ie = None
1690         _max_youtube_results = 1000
1691
1692         def __init__(self, youtube_ie, downloader=None):
1693                 InfoExtractor.__init__(self, downloader)
1694                 self._youtube_ie = youtube_ie
1695         
1696         @staticmethod
1697         def suitable(url):
1698                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1699
1700         def report_download_page(self, query, pagenum):
1701                 """Report attempt to download playlist page with given number."""
1702                 query = query.decode(preferredencoding())
1703                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1704
1705         def _real_initialize(self):
1706                 self._youtube_ie.initialize()
1707         
1708         def _real_extract(self, query):
1709                 mobj = re.match(self._VALID_QUERY, query)
1710                 if mobj is None:
1711                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1712                         return
1713
1714                 prefix, query = query.split(':')
1715                 prefix = prefix[8:]
1716                 query  = query.encode('utf-8')
1717                 if prefix == '':
1718                         self._download_n_results(query, 1)
1719                         return
1720                 elif prefix == 'all':
1721                         self._download_n_results(query, self._max_youtube_results)
1722                         return
1723                 else:
1724                         try:
1725                                 n = long(prefix)
1726                                 if n <= 0:
1727                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1728                                         return
1729                                 elif n > self._max_youtube_results:
1730                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1731                                         n = self._max_youtube_results
1732                                 self._download_n_results(query, n)
1733                                 return
1734                         except ValueError: # parsing prefix as integer fails
1735                                 self._download_n_results(query, 1)
1736                                 return
1737
1738         def _download_n_results(self, query, n):
1739                 """Downloads a specified number of results for a query"""
1740
1741                 video_ids = []
1742                 already_seen = set()
1743                 pagenum = 1
1744
1745                 while True:
1746                         self.report_download_page(query, pagenum)
1747                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1748                         request = urllib2.Request(result_url, None, std_headers)
1749                         try:
1750                                 page = urllib2.urlopen(request).read()
1751                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1752                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1753                                 return
1754
1755                         # Extract video identifiers
1756                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1757                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1758                                 if video_id not in already_seen:
1759                                         video_ids.append(video_id)
1760                                         already_seen.add(video_id)
1761                                         if len(video_ids) == n:
1762                                                 # Specified n videos reached
1763                                                 for id in video_ids:
1764                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1765                                                 return
1766
1767                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1768                                 for id in video_ids:
1769                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1770                                 return
1771
1772                         pagenum = pagenum + 1
1773
1774 class GoogleSearchIE(InfoExtractor):
1775         """Information Extractor for Google Video search queries."""
1776         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1777         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1778         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1779         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1780         _google_ie = None
1781         _max_google_results = 1000
1782
1783         def __init__(self, google_ie, downloader=None):
1784                 InfoExtractor.__init__(self, downloader)
1785                 self._google_ie = google_ie
1786         
1787         @staticmethod
1788         def suitable(url):
1789                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1790
1791         def report_download_page(self, query, pagenum):
1792                 """Report attempt to download playlist page with given number."""
1793                 query = query.decode(preferredencoding())
1794                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1795
1796         def _real_initialize(self):
1797                 self._google_ie.initialize()
1798         
1799         def _real_extract(self, query):
1800                 mobj = re.match(self._VALID_QUERY, query)
1801                 if mobj is None:
1802                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1803                         return
1804
1805                 prefix, query = query.split(':')
1806                 prefix = prefix[8:]
1807                 query  = query.encode('utf-8')
1808                 if prefix == '':
1809                         self._download_n_results(query, 1)
1810                         return
1811                 elif prefix == 'all':
1812                         self._download_n_results(query, self._max_google_results)
1813                         return
1814                 else:
1815                         try:
1816                                 n = long(prefix)
1817                                 if n <= 0:
1818                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1819                                         return
1820                                 elif n > self._max_google_results:
1821                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1822                                         n = self._max_google_results
1823                                 self._download_n_results(query, n)
1824                                 return
1825                         except ValueError: # parsing prefix as integer fails
1826                                 self._download_n_results(query, 1)
1827                                 return
1828
1829         def _download_n_results(self, query, n):
1830                 """Downloads a specified number of results for a query"""
1831
1832                 video_ids = []
1833                 already_seen = set()
1834                 pagenum = 1
1835
1836                 while True:
1837                         self.report_download_page(query, pagenum)
1838                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1839                         request = urllib2.Request(result_url, None, std_headers)
1840                         try:
1841                                 page = urllib2.urlopen(request).read()
1842                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1843                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1844                                 return
1845
1846                         # Extract video identifiers
1847                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1848                                 video_id = mobj.group(1)
1849                                 if video_id not in already_seen:
1850                                         video_ids.append(video_id)
1851                                         already_seen.add(video_id)
1852                                         if len(video_ids) == n:
1853                                                 # Specified n videos reached
1854                                                 for id in video_ids:
1855                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1856                                                 return
1857
1858                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1859                                 for id in video_ids:
1860                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1861                                 return
1862
1863                         pagenum = pagenum + 1
1864
1865 class YahooSearchIE(InfoExtractor):
1866         """Information Extractor for Yahoo! Video search queries."""
1867         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1868         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1869         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1870         _MORE_PAGES_INDICATOR = r'\s*Next'
1871         _yahoo_ie = None
1872         _max_yahoo_results = 1000
1873
1874         def __init__(self, yahoo_ie, downloader=None):
1875                 InfoExtractor.__init__(self, downloader)
1876                 self._yahoo_ie = yahoo_ie
1877         
1878         @staticmethod
1879         def suitable(url):
1880                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1881
1882         def report_download_page(self, query, pagenum):
1883                 """Report attempt to download playlist page with given number."""
1884                 query = query.decode(preferredencoding())
1885                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1886
1887         def _real_initialize(self):
1888                 self._yahoo_ie.initialize()
1889         
1890         def _real_extract(self, query):
1891                 mobj = re.match(self._VALID_QUERY, query)
1892                 if mobj is None:
1893                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1894                         return
1895
1896                 prefix, query = query.split(':')
1897                 prefix = prefix[8:]
1898                 query  = query.encode('utf-8')
1899                 if prefix == '':
1900                         self._download_n_results(query, 1)
1901                         return
1902                 elif prefix == 'all':
1903                         self._download_n_results(query, self._max_yahoo_results)
1904                         return
1905                 else:
1906                         try:
1907                                 n = long(prefix)
1908                                 if n <= 0:
1909                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1910                                         return
1911                                 elif n > self._max_yahoo_results:
1912                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
1913                                         n = self._max_yahoo_results
1914                                 self._download_n_results(query, n)
1915                                 return
1916                         except ValueError: # parsing prefix as integer fails
1917                                 self._download_n_results(query, 1)
1918                                 return
1919
1920         def _download_n_results(self, query, n):
1921                 """Downloads a specified number of results for a query"""
1922
1923                 video_ids = []
1924                 already_seen = set()
1925                 pagenum = 1
1926
1927                 while True:
1928                         self.report_download_page(query, pagenum)
1929                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1930                         request = urllib2.Request(result_url, None, std_headers)
1931                         try:
1932                                 page = urllib2.urlopen(request).read()
1933                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1934                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1935                                 return
1936
1937                         # Extract video identifiers
1938                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1939                                 video_id = mobj.group(1)
1940                                 if video_id not in already_seen:
1941                                         video_ids.append(video_id)
1942                                         already_seen.add(video_id)
1943                                         if len(video_ids) == n:
1944                                                 # Specified n videos reached
1945                                                 for id in video_ids:
1946                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1947                                                 return
1948
1949                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1950                                 for id in video_ids:
1951                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1952                                 return
1953
1954                         pagenum = pagenum + 1
1955
1956 class YoutubePlaylistIE(InfoExtractor):
1957         """Information Extractor for YouTube playlists."""
1958
1959         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1960         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1961         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1962         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1963         _youtube_ie = None
1964
1965         def __init__(self, youtube_ie, downloader=None):
1966                 InfoExtractor.__init__(self, downloader)
1967                 self._youtube_ie = youtube_ie
1968         
1969         @staticmethod
1970         def suitable(url):
1971                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1972
1973         def report_download_page(self, playlist_id, pagenum):
1974                 """Report attempt to download playlist page with given number."""
1975                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1976
1977         def _real_initialize(self):
1978                 self._youtube_ie.initialize()
1979         
1980         def _real_extract(self, url):
1981                 # Extract playlist id
1982                 mobj = re.match(self._VALID_URL, url)
1983                 if mobj is None:
1984                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1985                         return
1986
1987                 # Download playlist pages
1988                 playlist_id = mobj.group(1)
1989                 video_ids = []
1990                 pagenum = 1
1991
1992                 while True:
1993                         self.report_download_page(playlist_id, pagenum)
1994                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1995                         try:
1996                                 page = urllib2.urlopen(request).read()
1997                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1998                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1999                                 return
2000
2001                         # Extract video identifiers
2002                         ids_in_page = []
2003                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2004                                 if mobj.group(1) not in ids_in_page:
2005                                         ids_in_page.append(mobj.group(1))
2006                         video_ids.extend(ids_in_page)
2007
2008                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2009                                 break
2010                         pagenum = pagenum + 1
2011
2012                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2013                 playlistend = self._downloader.params.get('playlistend', -1)
2014                 video_ids = video_ids[playliststart:playlistend]
2015
2016                 for id in video_ids:
2017                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2018                 return
2019
2020 class YoutubeUserIE(InfoExtractor):
2021         """Information Extractor for YouTube users."""
2022
2023         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
2024         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2025         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
2026         _youtube_ie = None
2027
2028         def __init__(self, youtube_ie, downloader=None):
2029                 InfoExtractor.__init__(self, downloader)
2030                 self._youtube_ie = youtube_ie
2031         
2032         @staticmethod
2033         def suitable(url):
2034                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2035
2036         def report_download_page(self, username):
2037                 """Report attempt to download user page."""
2038                 self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username))
2039
2040         def _real_initialize(self):
2041                 self._youtube_ie.initialize()
2042         
2043         def _real_extract(self, url):
2044                 # Extract username
2045                 mobj = re.match(self._VALID_URL, url)
2046                 if mobj is None:
2047                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2048                         return
2049
2050                 # Download user page
2051                 username = mobj.group(1)
2052                 video_ids = []
2053                 pagenum = 1
2054
2055                 self.report_download_page(username)
2056                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
2057                 try:
2058                         page = urllib2.urlopen(request).read()
2059                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2060                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2061                         return
2062
2063                 # Extract video identifiers
2064                 ids_in_page = []
2065
2066                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2067                         if mobj.group(1) not in ids_in_page:
2068                                 ids_in_page.append(mobj.group(1))
2069                 video_ids.extend(ids_in_page)
2070
2071                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2072                 playlistend = self._downloader.params.get('playlistend', -1)
2073                 video_ids = video_ids[playliststart:playlistend]
2074
2075                 for id in video_ids:
2076                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2077                 return
2078
2079 class PostProcessor(object):
2080         """Post Processor class.
2081
2082         PostProcessor objects can be added to downloaders with their
2083         add_post_processor() method. When the downloader has finished a
2084         successful download, it will take its internal chain of PostProcessors
2085         and start calling the run() method on each one of them, first with
2086         an initial argument and then with the returned value of the previous
2087         PostProcessor.
2088
2089         The chain will be stopped if one of them ever returns None or the end
2090         of the chain is reached.
2091
2092         PostProcessor objects follow a "mutual registration" process similar
2093         to InfoExtractor objects.
2094         """
2095
2096         _downloader = None
2097
2098         def __init__(self, downloader=None):
2099                 self._downloader = downloader
2100
2101         def set_downloader(self, downloader):
2102                 """Sets the downloader for this PP."""
2103                 self._downloader = downloader
2104         
2105         def run(self, information):
2106                 """Run the PostProcessor.
2107
2108                 The "information" argument is a dictionary like the ones
2109                 composed by InfoExtractors. The only difference is that this
2110                 one has an extra field called "filepath" that points to the
2111                 downloaded file.
2112
2113                 When this method returns None, the postprocessing chain is
2114                 stopped. However, this method may return an information
2115                 dictionary that will be passed to the next postprocessing
2116                 object in the chain. It can be the one it received after
2117                 changing some fields.
2118
2119                 In addition, this method may raise a PostProcessingError
2120                 exception that will be taken into account by the downloader
2121                 it was called from.
2122                 """
2123                 return information # by default, do nothing
2124         
2125 ### MAIN PROGRAM ###
2126 if __name__ == '__main__':
2127         try:
2128                 # Modules needed only when running the main program
2129                 import getpass
2130                 import optparse
2131
2132                 # Function to update the program file with the latest version from bitbucket.org
2133                 def update_self(downloader, filename):
2134                         # Note: downloader only used for options
2135                         if not os.access (filename, os.W_OK):
2136                                 sys.exit('ERROR: no write permissions on %s' % filename)
2137
2138                         downloader.to_screen('Updating to latest stable version...')
2139                         latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2140                         latest_version = urllib.urlopen(latest_url).read().strip()
2141                         prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2142                         newcontent = urllib.urlopen(prog_url).read()
2143                         stream = open(filename, 'w')
2144                         stream.write(newcontent)
2145                         stream.close()
2146                         downloader.to_screen('Updated to version %s' % latest_version)
2147
2148                 # Parse command line
2149                 parser = optparse.OptionParser(
2150                         usage='Usage: %prog [options] url...',
2151                         version='2010.11.19',
2152                         conflict_handler='resolve',
2153                 )
2154
2155                 parser.add_option('-h', '--help',
2156                                 action='help', help='print this help text and exit')
2157                 parser.add_option('-v', '--version',
2158                                 action='version', help='print program version and exit')
2159                 parser.add_option('-U', '--update',
2160                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2161                 parser.add_option('-i', '--ignore-errors',
2162                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2163                 parser.add_option('-r', '--rate-limit',
2164                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2165                 parser.add_option('-R', '--retries',
2166                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2167                 parser.add_option('--playlist-start',
2168                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2169                 parser.add_option('--playlist-end',
2170                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2171
2172                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2173                 authentication.add_option('-u', '--username',
2174                                 dest='username', metavar='USERNAME', help='account username')
2175                 authentication.add_option('-p', '--password',
2176                                 dest='password', metavar='PASSWORD', help='account password')
2177                 authentication.add_option('-n', '--netrc',
2178                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2179                 parser.add_option_group(authentication)
2180
2181                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2182                 video_format.add_option('-f', '--format',
2183                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2184                 video_format.add_option('-m', '--mobile-version',
2185                                 action='store_const', dest='format', help='alias for -f 17', const='17')
2186                 video_format.add_option('--all-formats',
2187                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2188                 video_format.add_option('--max-quality',
2189                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2190                 video_format.add_option('-b', '--best-quality',
2191                                 action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)')
2192                 parser.add_option_group(video_format)
2193
2194                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2195                 verbosity.add_option('-q', '--quiet',
2196                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2197                 verbosity.add_option('-s', '--simulate',
2198                                 action='store_true', dest='simulate', help='do not download video', default=False)
2199                 verbosity.add_option('-g', '--get-url',
2200                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2201                 verbosity.add_option('-e', '--get-title',
2202                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2203                 verbosity.add_option('--get-thumbnail',
2204                                 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2205                 verbosity.add_option('--get-description',
2206                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2207                 verbosity.add_option('--no-progress',
2208                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2209                 parser.add_option_group(verbosity)
2210
2211                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2212                 filesystem.add_option('-t', '--title',
2213                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2214                 filesystem.add_option('-l', '--literal',
2215                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2216                 filesystem.add_option('-A', '--auto-number',
2217                                 action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False)
2218                 filesystem.add_option('-o', '--output',
2219                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2220                 filesystem.add_option('-a', '--batch-file',
2221                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2222                 filesystem.add_option('-w', '--no-overwrites',
2223                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2224                 filesystem.add_option('-c', '--continue',
2225                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2226                 filesystem.add_option('--cookies',
2227                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2228                 parser.add_option_group(filesystem)
2229
2230                 (opts, args) = parser.parse_args()
2231
2232                 # Open appropriate CookieJar
2233                 if opts.cookiefile is None:
2234                         jar = cookielib.CookieJar()
2235                 else:
2236                         try:
2237                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2238                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2239                                         jar.load()
2240                         except (IOError, OSError), err:
2241                                 sys.exit(u'ERROR: unable to open cookie file')
2242
2243                 # General configuration
2244                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2245                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2246                 urllib2.install_opener(urllib2.build_opener(cookie_processor))
2247                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2248
2249                 # Batch file verification
2250                 batchurls = []
2251                 if opts.batchfile is not None:
2252                         try:
2253                                 if opts.batchfile == '-':
2254                                         batchfd = sys.stdin
2255                                 else:
2256                                         batchfd = open(opts.batchfile, 'r')
2257                                 batchurls = batchfd.readlines()
2258                                 batchurls = [x.strip() for x in batchurls]
2259                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2260                         except IOError:
2261                                 sys.exit(u'ERROR: batch file could not be read')
2262                 all_urls = batchurls + args
2263
2264                 # Conflicting, missing and erroneous options
2265                 if opts.bestquality:
2266                         print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n'
2267                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2268                         parser.error(u'using .netrc conflicts with giving username/password')
2269                 if opts.password is not None and opts.username is None:
2270                         parser.error(u'account username missing')
2271                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2272                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2273                 if opts.usetitle and opts.useliteral:
2274                         parser.error(u'using title conflicts with using literal title')
2275                 if opts.username is not None and opts.password is None:
2276                         opts.password = getpass.getpass(u'Type account password and press return:')
2277                 if opts.ratelimit is not None:
2278                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2279                         if numeric_limit is None:
2280                                 parser.error(u'invalid rate limit specified')
2281                         opts.ratelimit = numeric_limit
2282                 if opts.retries is not None:
2283                         try:
2284                                 opts.retries = long(opts.retries)
2285                         except (TypeError, ValueError), err:
2286                                 parser.error(u'invalid retry count specified')
2287                 try:
2288                         opts.playliststart = long(opts.playliststart)
2289                         if opts.playliststart <= 0:
2290                                 raise ValueError
2291                 except (TypeError, ValueError), err:
2292                         parser.error(u'invalid playlist start number specified')
2293                 try:
2294                         opts.playlistend = long(opts.playlistend)
2295                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2296                                 raise ValueError
2297                 except (TypeError, ValueError), err:
2298                         parser.error(u'invalid playlist end number specified')
2299
2300                 # Information extractors
2301                 youtube_ie = YoutubeIE()
2302                 metacafe_ie = MetacafeIE(youtube_ie)
2303                 dailymotion_ie = DailymotionIE()
2304                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2305                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2306                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2307                 google_ie = GoogleIE()
2308                 google_search_ie = GoogleSearchIE(google_ie)
2309                 photobucket_ie = PhotobucketIE()
2310                 yahoo_ie = YahooIE()
2311                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2312                 generic_ie = GenericIE()
2313
2314                 # File downloader
2315                 fd = FileDownloader({
2316                         'usenetrc': opts.usenetrc,
2317                         'username': opts.username,
2318                         'password': opts.password,
2319                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2320                         'forceurl': opts.geturl,
2321                         'forcetitle': opts.gettitle,
2322                         'forcethumbnail': opts.getthumbnail,
2323                         'forcedescription': opts.getdescription,
2324                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2325                         'format': opts.format,
2326                         'format_limit': opts.format_limit,
2327                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2328                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2329                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2330                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2331                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2332                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2333                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2334                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2335                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2336                                 or u'%(id)s.%(ext)s'),
2337                         'ignoreerrors': opts.ignoreerrors,
2338                         'ratelimit': opts.ratelimit,
2339                         'nooverwrites': opts.nooverwrites,
2340                         'retries': opts.retries,
2341                         'continuedl': opts.continue_dl,
2342                         'noprogress': opts.noprogress,
2343                         'playliststart': opts.playliststart,
2344                         'playlistend': opts.playlistend,
2345                         'logtostderr': opts.outtmpl == '-',
2346                         })
2347                 fd.add_info_extractor(youtube_search_ie)
2348                 fd.add_info_extractor(youtube_pl_ie)
2349                 fd.add_info_extractor(youtube_user_ie)
2350                 fd.add_info_extractor(metacafe_ie)
2351                 fd.add_info_extractor(dailymotion_ie)
2352                 fd.add_info_extractor(youtube_ie)
2353                 fd.add_info_extractor(google_ie)
2354                 fd.add_info_extractor(google_search_ie)
2355                 fd.add_info_extractor(photobucket_ie)
2356                 fd.add_info_extractor(yahoo_ie)
2357                 fd.add_info_extractor(yahoo_search_ie)
2358
2359                 # This must come last since it's the
2360                 # fallback if none of the others work
2361                 fd.add_info_extractor(generic_ie)
2362
2363                 # Update version
2364                 if opts.update_self:
2365                         update_self(fd, sys.argv[0])
2366
2367                 # Maybe do nothing
2368                 if len(all_urls) < 1:
2369                         if not opts.update_self:
2370                                 parser.error(u'you must provide at least one URL')
2371                         else:
2372                                 sys.exit()
2373                 retcode = fd.download(all_urls)
2374
2375                 # Dump cookie jar if requested
2376                 if opts.cookiefile is not None:
2377                         try:
2378                                 jar.save()
2379                         except (IOError, OSError), err:
2380                                 sys.exit(u'ERROR: unable to save cookie jar')
2381
2382                 sys.exit(retcode)
2383
2384         except DownloadError:
2385                 sys.exit(1)
2386         except SameFileError:
2387                 sys.exit(u'ERROR: fixed output name but more than one file to download')
2388         except KeyboardInterrupt:
2389                 sys.exit(u'\nERROR: Interrupted by user')