2 # -*- coding: utf-8 -*-
5 'Ricardo Garcia Gonzalez',
13 'Philipp Hagemeister',
17 __license__ = 'Public Domain'
18 __version__ = '2011.09.13'
20 UPDATE_URL = 'https://raw.github.com/phihag/youtube-dl/master/youtube-dl'
48 except ImportError: # Python 2.4
51 import cStringIO as StringIO
55 # parse_qs was moved from the cgi module to the urlparse module recently.
57 from urlparse import parse_qs
59 from cgi import parse_qs
67 import xml.etree.ElementTree
68 except ImportError: # Python<2.5
69 pass # Not officially supported, but let it slip
72 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
73 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
74 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
75 'Accept-Encoding': 'gzip, deflate',
76 'Accept-Language': 'en-us,en;q=0.5',
79 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
83 except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
89 def raiseError(msg, i):
90 raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
91 def skipSpace(i, expectMore=True):
92 while i < len(s) and s[i] in ' \t\r\n':
96 raiseError('Premature end', i)
98 def decodeEscape(match):
114 return unichr(int(esc[1:5], 16))
115 if len(esc) == 5+6 and esc[5:7] == '\\u':
116 hi = int(esc[1:5], 16)
117 low = int(esc[7:11], 16)
118 return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
119 raise ValueError('Unknown escape ' + str(esc))
126 while s[e-bslashes-1] == '\\':
128 if bslashes % 2 == 1:
132 rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
133 stri = rexp.sub(decodeEscape, s[i:e])
139 if s[i] == '}': # Empty dictionary
143 raiseError('Expected a string object key', i)
144 i,key = parseString(i)
146 if i >= len(s) or s[i] != ':':
147 raiseError('Expected a colon', i)
154 raiseError('Expected comma or closing curly brace', i)
159 if s[i] == ']': # Empty array
164 i = skipSpace(i) # Raise exception if premature end
168 raiseError('Expected a comma or closing bracket', i)
170 def parseDiscrete(i):
171 for k,v in {'true': True, 'false': False, 'null': None}.items():
172 if s.startswith(k, i):
174 raiseError('Not a boolean (or null)', i)
176 mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
178 raiseError('Not a number', i)
180 if '.' in nums or 'e' in nums or 'E' in nums:
181 return (i+len(nums), float(nums))
182 return (i+len(nums), int(nums))
183 CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
186 i,res = CHARMAP.get(s[i], parseNumber)(i)
187 i = skipSpace(i, False)
191 raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
194 def preferredencoding():
195 """Get preferred encoding.
197 Returns the best encoding scheme for the system, based on
198 locale.getpreferredencoding() and some further tweaks.
200 def yield_preferredencoding():
202 pref = locale.getpreferredencoding()
208 return yield_preferredencoding().next()
211 def htmlentity_transform(matchobj):
212 """Transforms an HTML entity to a Unicode character.
214 This function receives a match object and is intended to be used with
215 the re.sub() function.
217 entity = matchobj.group(1)
219 # Known non-numeric HTML entity
220 if entity in htmlentitydefs.name2codepoint:
221 return unichr(htmlentitydefs.name2codepoint[entity])
224 mobj = re.match(ur'(?u)#(x?\d+)', entity)
226 numstr = mobj.group(1)
227 if numstr.startswith(u'x'):
229 numstr = u'0%s' % numstr
232 return unichr(long(numstr, base))
234 # Unknown entity in name, return its literal representation
235 return (u'&%s;' % entity)
238 def sanitize_title(utitle):
239 """Sanitizes a video title so it could be used as part of a filename."""
240 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
241 return utitle.replace(unicode(os.sep), u'%')
244 def sanitize_open(filename, open_mode):
245 """Try to open the given filename, and slightly tweak it if this fails.
247 Attempts to open the given filename. If this fails, it tries to change
248 the filename slightly, step by step, until it's either able to open it
249 or it fails and raises a final exception, like the standard open()
252 It returns the tuple (stream, definitive_file_name).
256 if sys.platform == 'win32':
258 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
259 return (sys.stdout, filename)
260 stream = open(filename, open_mode)
261 return (stream, filename)
262 except (IOError, OSError), err:
263 # In case of error, try to remove win32 forbidden chars
264 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
266 # An exception here should be caught in the caller
267 stream = open(filename, open_mode)
268 return (stream, filename)
271 def timeconvert(timestr):
272 """Convert RFC 2822 defined time string into system timestamp"""
274 timetuple = email.utils.parsedate_tz(timestr)
275 if timetuple is not None:
276 timestamp = email.utils.mktime_tz(timetuple)
280 class DownloadError(Exception):
281 """Download Error exception.
283 This exception may be thrown by FileDownloader objects if they are not
284 configured to continue on errors. They will contain the appropriate
290 class SameFileError(Exception):
291 """Same File exception.
293 This exception will be thrown by FileDownloader objects if they detect
294 multiple files would have to be downloaded to the same file on disk.
299 class PostProcessingError(Exception):
300 """Post Processing exception.
302 This exception may be raised by PostProcessor's .run() method to
303 indicate an error in the postprocessing task.
308 class UnavailableVideoError(Exception):
309 """Unavailable Format exception.
311 This exception will be thrown when a video is requested
312 in a format that is not available for that video.
317 class ContentTooShortError(Exception):
318 """Content Too Short exception.
320 This exception may be raised by FileDownloader objects when a file they
321 download is too small for what the server announced first, indicating
322 the connection was probably interrupted.
328 def __init__(self, downloaded, expected):
329 self.downloaded = downloaded
330 self.expected = expected
333 class YoutubeDLHandler(urllib2.HTTPHandler):
334 """Handler for HTTP requests and responses.
336 This class, when installed with an OpenerDirector, automatically adds
337 the standard headers to every HTTP request and handles gzipped and
338 deflated responses from web servers. If compression is to be avoided in
339 a particular request, the original request in the program code only has
340 to include the HTTP header "Youtubedl-No-Compression", which will be
341 removed before making the real request.
343 Part of this code was copied from:
345 http://techknack.net/python-urllib2-handlers/
347 Andrew Rowls, the author of that code, agreed to release it to the
354 return zlib.decompress(data, -zlib.MAX_WBITS)
356 return zlib.decompress(data)
359 def addinfourl_wrapper(stream, headers, url, code):
360 if hasattr(urllib2.addinfourl, 'getcode'):
361 return urllib2.addinfourl(stream, headers, url, code)
362 ret = urllib2.addinfourl(stream, headers, url)
366 def http_request(self, req):
367 for h in std_headers:
370 req.add_header(h, std_headers[h])
371 if 'Youtubedl-no-compression' in req.headers:
372 if 'Accept-encoding' in req.headers:
373 del req.headers['Accept-encoding']
374 del req.headers['Youtubedl-no-compression']
377 def http_response(self, req, resp):
380 if resp.headers.get('Content-encoding', '') == 'gzip':
381 gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
382 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
383 resp.msg = old_resp.msg
385 if resp.headers.get('Content-encoding', '') == 'deflate':
386 gz = StringIO.StringIO(self.deflate(resp.read()))
387 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
388 resp.msg = old_resp.msg
392 class FileDownloader(object):
393 """File Downloader class.
395 File downloader objects are the ones responsible of downloading the
396 actual video file and writing it to disk if the user has requested
397 it, among some other tasks. In most cases there should be one per
398 program. As, given a video URL, the downloader doesn't know how to
399 extract all the needed information, task that InfoExtractors do, it
400 has to pass the URL to one of them.
402 For this, file downloader objects have a method that allows
403 InfoExtractors to be registered in a given order. When it is passed
404 a URL, the file downloader handles it to the first InfoExtractor it
405 finds that reports being able to handle it. The InfoExtractor extracts
406 all the information about the video or videos the URL refers to, and
407 asks the FileDownloader to process the video information, possibly
408 downloading the video.
410 File downloaders accept a lot of parameters. In order not to saturate
411 the object constructor with arguments, it receives a dictionary of
412 options instead. These options are available through the params
413 attribute for the InfoExtractors to use. The FileDownloader also
414 registers itself as the downloader in charge for the InfoExtractors
415 that are added to it, so this is a "mutual registration".
419 username: Username for authentication purposes.
420 password: Password for authentication purposes.
421 usenetrc: Use netrc for authentication instead.
422 quiet: Do not print messages to stdout.
423 forceurl: Force printing final URL.
424 forcetitle: Force printing title.
425 forcethumbnail: Force printing thumbnail URL.
426 forcedescription: Force printing description.
427 forcefilename: Force printing final filename.
428 simulate: Do not download the video files.
429 format: Video format code.
430 format_limit: Highest quality format to try.
431 outtmpl: Template for output names.
432 ignoreerrors: Do not stop on download errors.
433 ratelimit: Download speed limit, in bytes/sec.
434 nooverwrites: Prevent overwriting files.
435 retries: Number of times to retry for HTTP error 5xx
436 continuedl: Try to continue downloads if possible.
437 noprogress: Do not print the progress bar.
438 playliststart: Playlist item to start at.
439 playlistend: Playlist item to end at.
440 logtostderr: Log messages to stderr instead of stdout.
441 consoletitle: Display progress in console window's titlebar.
442 nopart: Do not use temporary .part files.
443 updatetime: Use the Last-modified header to set output file timestamps.
444 writedescription: Write the video description to a .description file
445 writeinfojson: Write the video description to a .info.json file
451 _download_retcode = None
452 _num_downloads = None
455 def __init__(self, params):
456 """Create a FileDownloader object with the given options."""
459 self._download_retcode = 0
460 self._num_downloads = 0
461 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
465 def format_bytes(bytes):
468 if type(bytes) is str:
473 exponent = long(math.log(bytes, 1024.0))
474 suffix = 'bkMGTPEZY'[exponent]
475 converted = float(bytes) / float(1024 ** exponent)
476 return '%.2f%s' % (converted, suffix)
479 def calc_percent(byte_counter, data_len):
482 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
485 def calc_eta(start, now, total, current):
489 if current == 0 or dif < 0.001: # One millisecond
491 rate = float(current) / dif
492 eta = long((float(total) - float(current)) / rate)
493 (eta_mins, eta_secs) = divmod(eta, 60)
496 return '%02d:%02d' % (eta_mins, eta_secs)
499 def calc_speed(start, now, bytes):
501 if bytes == 0 or dif < 0.001: # One millisecond
502 return '%10s' % '---b/s'
503 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
506 def best_block_size(elapsed_time, bytes):
507 new_min = max(bytes / 2.0, 1.0)
508 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
509 if elapsed_time < 0.001:
511 rate = bytes / elapsed_time
519 def parse_bytes(bytestr):
520 """Parse a string indicating a byte quantity into a long integer."""
521 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
524 number = float(matchobj.group(1))
525 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
526 return long(round(number * multiplier))
528 def add_info_extractor(self, ie):
529 """Add an InfoExtractor object to the end of the list."""
531 ie.set_downloader(self)
533 def add_post_processor(self, pp):
534 """Add a PostProcessor object to the end of the chain."""
536 pp.set_downloader(self)
538 def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
539 """Print message to stdout if not in quiet mode."""
541 if not self.params.get('quiet', False):
542 terminator = [u'\n', u''][skip_eol]
543 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
544 self._screen_file.flush()
545 except (UnicodeEncodeError), err:
546 if not ignore_encoding_errors:
549 def to_stderr(self, message):
550 """Print message to stderr."""
551 print >>sys.stderr, message.encode(preferredencoding())
553 def to_cons_title(self, message):
554 """Set console/terminal window title to message."""
555 if not self.params.get('consoletitle', False):
557 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
558 # c_wchar_p() might not be necessary if `message` is
559 # already of type unicode()
560 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
561 elif 'TERM' in os.environ:
562 sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
564 def fixed_template(self):
565 """Checks if the output template is fixed."""
566 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
568 def trouble(self, message=None):
569 """Determine action to take when a download problem appears.
571 Depending on if the downloader has been configured to ignore
572 download errors or not, this method may throw an exception or
573 not when errors are found, after printing the message.
575 if message is not None:
576 self.to_stderr(message)
577 if not self.params.get('ignoreerrors', False):
578 raise DownloadError(message)
579 self._download_retcode = 1
581 def slow_down(self, start_time, byte_counter):
582 """Sleep if the download speed is over the rate limit."""
583 rate_limit = self.params.get('ratelimit', None)
584 if rate_limit is None or byte_counter == 0:
587 elapsed = now - start_time
590 speed = float(byte_counter) / elapsed
591 if speed > rate_limit:
592 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
594 def temp_name(self, filename):
595 """Returns a temporary filename for the given filename."""
596 if self.params.get('nopart', False) or filename == u'-' or \
597 (os.path.exists(filename) and not os.path.isfile(filename)):
599 return filename + u'.part'
601 def undo_temp_name(self, filename):
602 if filename.endswith(u'.part'):
603 return filename[:-len(u'.part')]
606 def try_rename(self, old_filename, new_filename):
608 if old_filename == new_filename:
610 os.rename(old_filename, new_filename)
611 except (IOError, OSError), err:
612 self.trouble(u'ERROR: unable to rename file')
614 def try_utime(self, filename, last_modified_hdr):
615 """Try to set the last-modified time of the given file."""
616 if last_modified_hdr is None:
618 if not os.path.isfile(filename):
620 timestr = last_modified_hdr
623 filetime = timeconvert(timestr)
627 os.utime(filename, (time.time(), filetime))
631 def report_writedescription(self, descfn):
632 """ Report that the description file is being written """
633 self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
635 def report_writeinfojson(self, infofn):
636 """ Report that the metadata file has been written """
637 self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
639 def report_destination(self, filename):
640 """Report destination filename."""
641 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
643 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
644 """Report download progress."""
645 if self.params.get('noprogress', False):
647 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
648 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
649 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
650 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
652 def report_resuming_byte(self, resume_len):
653 """Report attempt to resume at given byte."""
654 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
656 def report_retry(self, count, retries):
657 """Report retry in case of HTTP error 5xx"""
658 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
660 def report_file_already_downloaded(self, file_name):
661 """Report file has already been fully downloaded."""
663 self.to_screen(u'[download] %s has already been downloaded' % file_name)
664 except (UnicodeEncodeError), err:
665 self.to_screen(u'[download] The file has already been downloaded')
667 def report_unable_to_resume(self):
668 """Report it was impossible to resume download."""
669 self.to_screen(u'[download] Unable to resume')
671 def report_finish(self):
672 """Report download finished."""
673 if self.params.get('noprogress', False):
674 self.to_screen(u'[download] Download completed')
678 def increment_downloads(self):
679 """Increment the ordinal that assigns a number to each file."""
680 self._num_downloads += 1
682 def prepare_filename(self, info_dict):
683 """Generate the output filename."""
685 template_dict = dict(info_dict)
686 template_dict['epoch'] = unicode(long(time.time()))
687 template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
688 filename = self.params['outtmpl'] % template_dict
690 except (ValueError, KeyError), err:
691 self.trouble(u'ERROR: invalid system charset or erroneous output template')
694 def process_info(self, info_dict):
695 """Process a single dictionary returned by an InfoExtractor."""
696 filename = self.prepare_filename(info_dict)
697 # Do nothing else if in simulate mode
698 if self.params.get('simulate', False):
700 if self.params.get('forcetitle', False):
701 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
702 if self.params.get('forceurl', False):
703 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
704 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
705 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
706 if self.params.get('forcedescription', False) and 'description' in info_dict:
707 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
708 if self.params.get('forcefilename', False) and filename is not None:
709 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
715 if self.params.get('nooverwrites', False) and os.path.exists(filename):
716 self.to_stderr(u'WARNING: file exists and will be skipped')
720 dn = os.path.dirname(filename)
721 if dn != '' and not os.path.exists(dn):
723 except (OSError, IOError), err:
724 self.trouble(u'ERROR: unable to create directory ' + unicode(err))
727 if self.params.get('writedescription', False):
729 descfn = filename + '.description'
730 self.report_writedescription(descfn)
731 descfile = open(descfn, 'wb')
733 descfile.write(info_dict['description'].encode('utf-8'))
736 except (OSError, IOError):
737 self.trouble(u'ERROR: Cannot write description file ' + descfn)
740 if self.params.get('writeinfojson', False):
741 infofn = filename + '.info.json'
742 self.report_writeinfojson(infofn)
745 except (NameError,AttributeError):
746 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
749 infof = open(infofn, 'wb')
751 json.dump(info_dict, infof)
754 except (OSError, IOError):
755 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
759 success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
760 except (OSError, IOError), err:
761 raise UnavailableVideoError
762 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
763 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
765 except (ContentTooShortError, ), err:
766 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
771 self.post_process(filename, info_dict)
772 except (PostProcessingError), err:
773 self.trouble(u'ERROR: postprocessing: %s' % str(err))
776 def download(self, url_list):
777 """Download a given list of URLs."""
778 if len(url_list) > 1 and self.fixed_template():
779 raise SameFileError(self.params['outtmpl'])
782 suitable_found = False
784 # Go to next InfoExtractor if not suitable
785 if not ie.suitable(url):
788 # Suitable InfoExtractor found
789 suitable_found = True
791 # Extract information from URL and process it
794 # Suitable InfoExtractor had been found; go to next URL
797 if not suitable_found:
798 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
800 return self._download_retcode
802 def post_process(self, filename, ie_info):
803 """Run the postprocessing chain on the given file."""
805 info['filepath'] = filename
811 def _download_with_rtmpdump(self, filename, url, player_url):
812 self.report_destination(filename)
813 tmpfilename = self.temp_name(filename)
815 # Check for rtmpdump first
817 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
818 except (OSError, IOError):
819 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
822 # Download using rtmpdump. rtmpdump returns exit code 2 when
823 # the connection was interrumpted and resuming appears to be
824 # possible. This is part of rtmpdump's normal usage, AFAIK.
825 basic_args = ['rtmpdump'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
826 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
827 while retval == 2 or retval == 1:
828 prevsize = os.path.getsize(tmpfilename)
829 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
830 time.sleep(5.0) # This seems to be needed
831 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
832 cursize = os.path.getsize(tmpfilename)
833 if prevsize == cursize and retval == 1:
836 self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
837 self.try_rename(tmpfilename, filename)
840 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
843 def _do_download(self, filename, url, player_url):
844 # Check file already present
845 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
846 self.report_file_already_downloaded(filename)
849 # Attempt to download using rtmpdump
850 if url.startswith('rtmp'):
851 return self._download_with_rtmpdump(filename, url, player_url)
853 tmpfilename = self.temp_name(filename)
857 # Do not include the Accept-Encoding header
858 headers = {'Youtubedl-no-compression': 'True'}
859 basic_request = urllib2.Request(url, None, headers)
860 request = urllib2.Request(url, None, headers)
862 # Establish possible resume length
863 if os.path.isfile(tmpfilename):
864 resume_len = os.path.getsize(tmpfilename)
868 # Request parameters in case of being able to resume
869 if self.params.get('continuedl', False) and resume_len != 0:
870 self.report_resuming_byte(resume_len)
871 request.add_header('Range', 'bytes=%d-' % resume_len)
875 retries = self.params.get('retries', 0)
876 while count <= retries:
877 # Establish connection
879 data = urllib2.urlopen(request)
881 except (urllib2.HTTPError, ), err:
882 if (err.code < 500 or err.code >= 600) and err.code != 416:
883 # Unexpected HTTP error
885 elif err.code == 416:
886 # Unable to resume (requested range not satisfiable)
888 # Open the connection again without the range header
889 data = urllib2.urlopen(basic_request)
890 content_length = data.info()['Content-Length']
891 except (urllib2.HTTPError, ), err:
892 if err.code < 500 or err.code >= 600:
895 # Examine the reported length
896 if (content_length is not None and
897 (resume_len - 100 < long(content_length) < resume_len + 100)):
898 # The file had already been fully downloaded.
899 # Explanation to the above condition: in issue #175 it was revealed that
900 # YouTube sometimes adds or removes a few bytes from the end of the file,
901 # changing the file size slightly and causing problems for some users. So
902 # I decided to implement a suggested change and consider the file
903 # completely downloaded if the file size differs less than 100 bytes from
904 # the one in the hard drive.
905 self.report_file_already_downloaded(filename)
906 self.try_rename(tmpfilename, filename)
909 # The length does not match, we start the download over
910 self.report_unable_to_resume()
916 self.report_retry(count, retries)
919 self.trouble(u'ERROR: giving up after %s retries' % retries)
922 data_len = data.info().get('Content-length', None)
923 if data_len is not None:
924 data_len = long(data_len) + resume_len
925 data_len_str = self.format_bytes(data_len)
926 byte_counter = 0 + resume_len
932 data_block = data.read(block_size)
934 if len(data_block) == 0:
936 byte_counter += len(data_block)
938 # Open file just in time
941 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
942 assert stream is not None
943 filename = self.undo_temp_name(tmpfilename)
944 self.report_destination(filename)
945 except (OSError, IOError), err:
946 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
949 stream.write(data_block)
950 except (IOError, OSError), err:
951 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
953 block_size = self.best_block_size(after - before, len(data_block))
956 percent_str = self.calc_percent(byte_counter, data_len)
957 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
958 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
959 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
962 self.slow_down(start, byte_counter - resume_len)
965 self.trouble(u'\nERROR: Did not get any data blocks')
969 if data_len is not None and byte_counter != data_len:
970 raise ContentTooShortError(byte_counter, long(data_len))
971 self.try_rename(tmpfilename, filename)
973 # Update file modification time
974 if self.params.get('updatetime', True):
975 self.try_utime(filename, data.info().get('last-modified', None))
980 class InfoExtractor(object):
981 """Information Extractor class.
983 Information extractors are the classes that, given a URL, extract
984 information from the video (or videos) the URL refers to. This
985 information includes the real video URL, the video title and simplified
986 title, author and others. The information is stored in a dictionary
987 which is then passed to the FileDownloader. The FileDownloader
988 processes this information possibly downloading the video to the file
989 system, among other possible outcomes. The dictionaries must include
990 the following fields:
992 id: Video identifier.
993 url: Final video URL.
994 uploader: Nickname of the video uploader.
995 title: Literal title.
996 stitle: Simplified title.
997 ext: Video filename extension.
998 format: Video format.
999 player_url: SWF Player URL (may be None).
1001 The following fields are optional. Their primary purpose is to allow
1002 youtube-dl to serve as the backend for a video search function, such
1003 as the one in youtube2mp3. They are only used when their respective
1004 forced printing functions are called:
1006 thumbnail: Full URL to a video thumbnail image.
1007 description: One-line video description.
1009 Subclasses of this one should re-define the _real_initialize() and
1010 _real_extract() methods, as well as the suitable() static method.
1011 Probably, they should also be instantiated and added to the main
1018 def __init__(self, downloader=None):
1019 """Constructor. Receives an optional downloader."""
1021 self.set_downloader(downloader)
1025 """Receives a URL and returns True if suitable for this IE."""
1028 def initialize(self):
1029 """Initializes an instance (authentication, etc)."""
1031 self._real_initialize()
1034 def extract(self, url):
1035 """Extracts URL information and returns it in list of dicts."""
1037 return self._real_extract(url)
1039 def set_downloader(self, downloader):
1040 """Sets the downloader for this IE."""
1041 self._downloader = downloader
1043 def _real_initialize(self):
1044 """Real initialization process. Redefine in subclasses."""
1047 def _real_extract(self, url):
1048 """Real extraction process. Redefine in subclasses."""
1052 class YoutubeIE(InfoExtractor):
1053 """Information extractor for youtube.com."""
1055 _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
1056 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
1057 _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
1058 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
1059 _NETRC_MACHINE = 'youtube'
1060 # Listed in order of quality
1061 _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']
1062 _video_extensions = {
1068 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
1075 return (re.match(YoutubeIE._VALID_URL, url) is not None)
1077 def report_lang(self):
1078 """Report attempt to set language."""
1079 self._downloader.to_screen(u'[youtube] Setting language')
1081 def report_login(self):
1082 """Report attempt to log in."""
1083 self._downloader.to_screen(u'[youtube] Logging in')
1085 def report_age_confirmation(self):
1086 """Report attempt to confirm age."""
1087 self._downloader.to_screen(u'[youtube] Confirming age')
1089 def report_video_webpage_download(self, video_id):
1090 """Report attempt to download video webpage."""
1091 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
1093 def report_video_info_webpage_download(self, video_id):
1094 """Report attempt to download video info webpage."""
1095 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
1097 def report_information_extraction(self, video_id):
1098 """Report attempt to extract video information."""
1099 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
1101 def report_unavailable_format(self, video_id, format):
1102 """Report extracted video URL."""
1103 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
1105 def report_rtmp_download(self):
1106 """Indicate the download will use the RTMP protocol."""
1107 self._downloader.to_screen(u'[youtube] RTMP download detected')
1109 def _real_initialize(self):
1110 if self._downloader is None:
1115 downloader_params = self._downloader.params
1117 # Attempt to use provided username and password or .netrc data
1118 if downloader_params.get('username', None) is not None:
1119 username = downloader_params['username']
1120 password = downloader_params['password']
1121 elif downloader_params.get('usenetrc', False):
1123 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
1124 if info is not None:
1128 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
1129 except (IOError, netrc.NetrcParseError), err:
1130 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
1134 request = urllib2.Request(self._LANG_URL)
1137 urllib2.urlopen(request).read()
1138 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1139 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
1142 # No authentication to be performed
1143 if username is None:
1148 'current_form': 'loginForm',
1150 'action_login': 'Log In',
1151 'username': username,
1152 'password': password,
1154 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
1157 login_results = urllib2.urlopen(request).read()
1158 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
1159 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
1161 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1162 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
1168 'action_confirm': 'Confirm',
1170 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
1172 self.report_age_confirmation()
1173 age_results = urllib2.urlopen(request).read()
1174 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1175 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1178 def _real_extract(self, url):
1179 # Extract video id from URL
1180 mobj = re.match(self._VALID_URL, url)
1182 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1184 video_id = mobj.group(2)
1187 self.report_video_webpage_download(video_id)
1188 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
1190 video_webpage = urllib2.urlopen(request).read()
1191 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1192 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1195 # Attempt to extract SWF player URL
1196 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1197 if mobj is not None:
1198 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1203 self.report_video_info_webpage_download(video_id)
1204 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1205 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1206 % (video_id, el_type))
1207 request = urllib2.Request(video_info_url)
1209 video_info_webpage = urllib2.urlopen(request).read()
1210 video_info = parse_qs(video_info_webpage)
1211 if 'token' in video_info:
1213 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1214 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1216 if 'token' not in video_info:
1217 if 'reason' in video_info:
1218 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1220 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1223 # Start extracting information
1224 self.report_information_extraction(video_id)
1227 if 'author' not in video_info:
1228 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1230 video_uploader = urllib.unquote_plus(video_info['author'][0])
1233 if 'title' not in video_info:
1234 self._downloader.trouble(u'ERROR: unable to extract video title')
1236 video_title = urllib.unquote_plus(video_info['title'][0])
1237 video_title = video_title.decode('utf-8')
1238 video_title = sanitize_title(video_title)
1241 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1242 simple_title = simple_title.strip(ur'_')
1245 if 'thumbnail_url' not in video_info:
1246 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1247 video_thumbnail = ''
1248 else: # don't panic if we can't find it
1249 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1253 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1254 if mobj is not None:
1255 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1256 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1257 for expression in format_expressions:
1259 upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1267 video_description = u'No description available.'
1268 if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
1269 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1270 if mobj is not None:
1271 video_description = mobj.group(1).decode('utf-8')
1273 html_parser = lxml.etree.HTMLParser(encoding='utf-8')
1274 vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
1275 video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
1276 # TODO use another parser
1279 video_token = urllib.unquote_plus(video_info['token'][0])
1281 # Decide which formats to download
1282 req_format = self._downloader.params.get('format', None)
1284 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1285 self.report_rtmp_download()
1286 video_url_list = [(None, video_info['conn'][0])]
1287 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1288 url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
1289 url_data = [parse_qs(uds) for uds in url_data_strs]
1290 url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
1291 url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
1293 format_limit = self._downloader.params.get('format_limit', None)
1294 if format_limit is not None and format_limit in self._available_formats:
1295 format_list = self._available_formats[self._available_formats.index(format_limit):]
1297 format_list = self._available_formats
1298 existing_formats = [x for x in format_list if x in url_map]
1299 if len(existing_formats) == 0:
1300 self._downloader.trouble(u'ERROR: no known formats available for video')
1302 if req_format is None:
1303 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1304 elif req_format == '-1':
1305 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1308 if req_format not in url_map:
1309 self._downloader.trouble(u'ERROR: requested format not available')
1311 video_url_list = [(req_format, url_map[req_format])] # Specific format
1313 self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
1316 for format_param, video_real_url in video_url_list:
1317 # At this point we have a new video
1318 self._downloader.increment_downloads()
1321 video_extension = self._video_extensions.get(format_param, 'flv')
1324 # Process video information
1325 self._downloader.process_info({
1326 'id': video_id.decode('utf-8'),
1327 'url': video_real_url.decode('utf-8'),
1328 'uploader': video_uploader.decode('utf-8'),
1329 'upload_date': upload_date,
1330 'title': video_title,
1331 'stitle': simple_title,
1332 'ext': video_extension.decode('utf-8'),
1333 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
1334 'thumbnail': video_thumbnail.decode('utf-8'),
1335 'description': video_description,
1336 'player_url': player_url,
1338 except UnavailableVideoError, err:
1339 self._downloader.trouble(u'\nERROR: unable to download video')
1342 class MetacafeIE(InfoExtractor):
1343 """Information Extractor for metacafe.com."""
1345 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1346 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1347 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1350 def __init__(self, youtube_ie, downloader=None):
1351 InfoExtractor.__init__(self, downloader)
1352 self._youtube_ie = youtube_ie
1356 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1358 def report_disclaimer(self):
1359 """Report disclaimer retrieval."""
1360 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1362 def report_age_confirmation(self):
1363 """Report attempt to confirm age."""
1364 self._downloader.to_screen(u'[metacafe] Confirming age')
1366 def report_download_webpage(self, video_id):
1367 """Report webpage download."""
1368 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1370 def report_extraction(self, video_id):
1371 """Report information extraction."""
1372 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1374 def _real_initialize(self):
1375 # Retrieve disclaimer
1376 request = urllib2.Request(self._DISCLAIMER)
1378 self.report_disclaimer()
1379 disclaimer = urllib2.urlopen(request).read()
1380 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1381 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1387 'submit': "Continue - I'm over 18",
1389 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1391 self.report_age_confirmation()
1392 disclaimer = urllib2.urlopen(request).read()
1393 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1394 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1397 def _real_extract(self, url):
1398 # Extract id and simplified title from URL
1399 mobj = re.match(self._VALID_URL, url)
1401 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1404 video_id = mobj.group(1)
1406 # Check if video comes from YouTube
1407 mobj2 = re.match(r'^yt-(.*)$', video_id)
1408 if mobj2 is not None:
1409 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1412 # At this point we have a new video
1413 self._downloader.increment_downloads()
1415 simple_title = mobj.group(2).decode('utf-8')
1417 # Retrieve video webpage to extract further information
1418 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1420 self.report_download_webpage(video_id)
1421 webpage = urllib2.urlopen(request).read()
1422 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1423 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1426 # Extract URL, uploader and title from webpage
1427 self.report_extraction(video_id)
1428 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1429 if mobj is not None:
1430 mediaURL = urllib.unquote(mobj.group(1))
1431 video_extension = mediaURL[-3:]
1433 # Extract gdaKey if available
1434 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1436 video_url = mediaURL
1438 gdaKey = mobj.group(1)
1439 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1441 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1443 self._downloader.trouble(u'ERROR: unable to extract media URL')
1445 vardict = parse_qs(mobj.group(1))
1446 if 'mediaData' not in vardict:
1447 self._downloader.trouble(u'ERROR: unable to extract media URL')
1449 mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1451 self._downloader.trouble(u'ERROR: unable to extract media URL')
1453 mediaURL = mobj.group(1).replace('\\/', '/')
1454 video_extension = mediaURL[-3:]
1455 video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1457 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1459 self._downloader.trouble(u'ERROR: unable to extract title')
1461 video_title = mobj.group(1).decode('utf-8')
1462 video_title = sanitize_title(video_title)
1464 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1466 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1468 video_uploader = mobj.group(1)
1471 # Process video information
1472 self._downloader.process_info({
1473 'id': video_id.decode('utf-8'),
1474 'url': video_url.decode('utf-8'),
1475 'uploader': video_uploader.decode('utf-8'),
1476 'upload_date': u'NA',
1477 'title': video_title,
1478 'stitle': simple_title,
1479 'ext': video_extension.decode('utf-8'),
1483 except UnavailableVideoError:
1484 self._downloader.trouble(u'\nERROR: unable to download video')
1487 class DailymotionIE(InfoExtractor):
1488 """Information Extractor for Dailymotion"""
1490 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1492 def __init__(self, downloader=None):
1493 InfoExtractor.__init__(self, downloader)
1497 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1499 def report_download_webpage(self, video_id):
1500 """Report webpage download."""
1501 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1503 def report_extraction(self, video_id):
1504 """Report information extraction."""
1505 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1507 def _real_initialize(self):
1510 def _real_extract(self, url):
1511 # Extract id and simplified title from URL
1512 mobj = re.match(self._VALID_URL, url)
1514 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1517 # At this point we have a new video
1518 self._downloader.increment_downloads()
1519 video_id = mobj.group(1)
1521 simple_title = mobj.group(2).decode('utf-8')
1522 video_extension = 'flv'
1524 # Retrieve video webpage to extract further information
1525 request = urllib2.Request(url)
1527 self.report_download_webpage(video_id)
1528 webpage = urllib2.urlopen(request).read()
1529 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1530 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1533 # Extract URL, uploader and title from webpage
1534 self.report_extraction(video_id)
1535 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1537 self._downloader.trouble(u'ERROR: unable to extract media URL')
1539 mediaURL = urllib.unquote(mobj.group(1))
1541 # if needed add http://www.dailymotion.com/ if relative URL
1543 video_url = mediaURL
1545 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1546 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1548 self._downloader.trouble(u'ERROR: unable to extract title')
1550 video_title = mobj.group(1).decode('utf-8')
1551 video_title = sanitize_title(video_title)
1553 mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
1555 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1557 video_uploader = mobj.group(1)
1560 # Process video information
1561 self._downloader.process_info({
1562 'id': video_id.decode('utf-8'),
1563 'url': video_url.decode('utf-8'),
1564 'uploader': video_uploader.decode('utf-8'),
1565 'upload_date': u'NA',
1566 'title': video_title,
1567 'stitle': simple_title,
1568 'ext': video_extension.decode('utf-8'),
1572 except UnavailableVideoError:
1573 self._downloader.trouble(u'\nERROR: unable to download video')
1576 class GoogleIE(InfoExtractor):
1577 """Information extractor for video.google.com."""
1579 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1581 def __init__(self, downloader=None):
1582 InfoExtractor.__init__(self, downloader)
1586 return (re.match(GoogleIE._VALID_URL, url) is not None)
1588 def report_download_webpage(self, video_id):
1589 """Report webpage download."""
1590 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1592 def report_extraction(self, video_id):
1593 """Report information extraction."""
1594 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1596 def _real_initialize(self):
1599 def _real_extract(self, url):
1600 # Extract id from URL
1601 mobj = re.match(self._VALID_URL, url)
1603 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1606 # At this point we have a new video
1607 self._downloader.increment_downloads()
1608 video_id = mobj.group(1)
1610 video_extension = 'mp4'
1612 # Retrieve video webpage to extract further information
1613 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1615 self.report_download_webpage(video_id)
1616 webpage = urllib2.urlopen(request).read()
1617 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1618 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1621 # Extract URL, uploader, and title from webpage
1622 self.report_extraction(video_id)
1623 mobj = re.search(r"download_url:'([^']+)'", webpage)
1625 video_extension = 'flv'
1626 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1628 self._downloader.trouble(u'ERROR: unable to extract media URL')
1630 mediaURL = urllib.unquote(mobj.group(1))
1631 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1632 mediaURL = mediaURL.replace('\\x26', '\x26')
1634 video_url = mediaURL
1636 mobj = re.search(r'<title>(.*)</title>', webpage)
1638 self._downloader.trouble(u'ERROR: unable to extract title')
1640 video_title = mobj.group(1).decode('utf-8')
1641 video_title = sanitize_title(video_title)
1642 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1644 # Extract video description
1645 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1647 self._downloader.trouble(u'ERROR: unable to extract video description')
1649 video_description = mobj.group(1).decode('utf-8')
1650 if not video_description:
1651 video_description = 'No description available.'
1653 # Extract video thumbnail
1654 if self._downloader.params.get('forcethumbnail', False):
1655 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1657 webpage = urllib2.urlopen(request).read()
1658 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1659 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1661 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1663 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1665 video_thumbnail = mobj.group(1)
1666 else: # we need something to pass to process_info
1667 video_thumbnail = ''
1670 # Process video information
1671 self._downloader.process_info({
1672 'id': video_id.decode('utf-8'),
1673 'url': video_url.decode('utf-8'),
1675 'upload_date': u'NA',
1676 'title': video_title,
1677 'stitle': simple_title,
1678 'ext': video_extension.decode('utf-8'),
1682 except UnavailableVideoError:
1683 self._downloader.trouble(u'\nERROR: unable to download video')
1686 class PhotobucketIE(InfoExtractor):
1687 """Information extractor for photobucket.com."""
1689 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1691 def __init__(self, downloader=None):
1692 InfoExtractor.__init__(self, downloader)
1696 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1698 def report_download_webpage(self, video_id):
1699 """Report webpage download."""
1700 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1702 def report_extraction(self, video_id):
1703 """Report information extraction."""
1704 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1706 def _real_initialize(self):
1709 def _real_extract(self, url):
1710 # Extract id from URL
1711 mobj = re.match(self._VALID_URL, url)
1713 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1716 # At this point we have a new video
1717 self._downloader.increment_downloads()
1718 video_id = mobj.group(1)
1720 video_extension = 'flv'
1722 # Retrieve video webpage to extract further information
1723 request = urllib2.Request(url)
1725 self.report_download_webpage(video_id)
1726 webpage = urllib2.urlopen(request).read()
1727 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1728 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1731 # Extract URL, uploader, and title from webpage
1732 self.report_extraction(video_id)
1733 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1735 self._downloader.trouble(u'ERROR: unable to extract media URL')
1737 mediaURL = urllib.unquote(mobj.group(1))
1739 video_url = mediaURL
1741 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1743 self._downloader.trouble(u'ERROR: unable to extract title')
1745 video_title = mobj.group(1).decode('utf-8')
1746 video_title = sanitize_title(video_title)
1747 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1749 video_uploader = mobj.group(2).decode('utf-8')
1752 # Process video information
1753 self._downloader.process_info({
1754 'id': video_id.decode('utf-8'),
1755 'url': video_url.decode('utf-8'),
1756 'uploader': video_uploader,
1757 'upload_date': u'NA',
1758 'title': video_title,
1759 'stitle': simple_title,
1760 'ext': video_extension.decode('utf-8'),
1764 except UnavailableVideoError:
1765 self._downloader.trouble(u'\nERROR: unable to download video')
1768 class YahooIE(InfoExtractor):
1769 """Information extractor for video.yahoo.com."""
1771 # _VALID_URL matches all Yahoo! Video URLs
1772 # _VPAGE_URL matches only the extractable '/watch/' URLs
1773 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1774 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1776 def __init__(self, downloader=None):
1777 InfoExtractor.__init__(self, downloader)
1781 return (re.match(YahooIE._VALID_URL, url) is not None)
1783 def report_download_webpage(self, video_id):
1784 """Report webpage download."""
1785 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1787 def report_extraction(self, video_id):
1788 """Report information extraction."""
1789 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1791 def _real_initialize(self):
1794 def _real_extract(self, url, new_video=True):
1795 # Extract ID from URL
1796 mobj = re.match(self._VALID_URL, url)
1798 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1801 # At this point we have a new video
1802 self._downloader.increment_downloads()
1803 video_id = mobj.group(2)
1804 video_extension = 'flv'
1806 # Rewrite valid but non-extractable URLs as
1807 # extractable English language /watch/ URLs
1808 if re.match(self._VPAGE_URL, url) is None:
1809 request = urllib2.Request(url)
1811 webpage = urllib2.urlopen(request).read()
1812 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1813 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1816 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1818 self._downloader.trouble(u'ERROR: Unable to extract id field')
1820 yahoo_id = mobj.group(1)
1822 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1824 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1826 yahoo_vid = mobj.group(1)
1828 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1829 return self._real_extract(url, new_video=False)
1831 # Retrieve video webpage to extract further information
1832 request = urllib2.Request(url)
1834 self.report_download_webpage(video_id)
1835 webpage = urllib2.urlopen(request).read()
1836 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1837 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1840 # Extract uploader and title from webpage
1841 self.report_extraction(video_id)
1842 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1844 self._downloader.trouble(u'ERROR: unable to extract video title')
1846 video_title = mobj.group(1).decode('utf-8')
1847 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1849 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1851 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1853 video_uploader = mobj.group(1).decode('utf-8')
1855 # Extract video thumbnail
1856 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1858 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1860 video_thumbnail = mobj.group(1).decode('utf-8')
1862 # Extract video description
1863 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1865 self._downloader.trouble(u'ERROR: unable to extract video description')
1867 video_description = mobj.group(1).decode('utf-8')
1868 if not video_description:
1869 video_description = 'No description available.'
1871 # Extract video height and width
1872 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1874 self._downloader.trouble(u'ERROR: unable to extract video height')
1876 yv_video_height = mobj.group(1)
1878 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1880 self._downloader.trouble(u'ERROR: unable to extract video width')
1882 yv_video_width = mobj.group(1)
1884 # Retrieve video playlist to extract media URL
1885 # I'm not completely sure what all these options are, but we
1886 # seem to need most of them, otherwise the server sends a 401.
1887 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1888 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1889 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1890 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1891 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1893 self.report_download_webpage(video_id)
1894 webpage = urllib2.urlopen(request).read()
1895 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1896 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1899 # Extract media URL from playlist XML
1900 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1902 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1904 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1905 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1908 # Process video information
1909 self._downloader.process_info({
1910 'id': video_id.decode('utf-8'),
1912 'uploader': video_uploader,
1913 'upload_date': u'NA',
1914 'title': video_title,
1915 'stitle': simple_title,
1916 'ext': video_extension.decode('utf-8'),
1917 'thumbnail': video_thumbnail.decode('utf-8'),
1918 'description': video_description,
1919 'thumbnail': video_thumbnail,
1920 'description': video_description,
1923 except UnavailableVideoError:
1924 self._downloader.trouble(u'\nERROR: unable to download video')
1927 class VimeoIE(InfoExtractor):
1928 """Information extractor for vimeo.com."""
1930 # _VALID_URL matches Vimeo URLs
1931 _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
1933 def __init__(self, downloader=None):
1934 InfoExtractor.__init__(self, downloader)
1938 return (re.match(VimeoIE._VALID_URL, url) is not None)
1940 def report_download_webpage(self, video_id):
1941 """Report webpage download."""
1942 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
1944 def report_extraction(self, video_id):
1945 """Report information extraction."""
1946 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
1948 def _real_initialize(self):
1951 def _real_extract(self, url, new_video=True):
1952 # Extract ID from URL
1953 mobj = re.match(self._VALID_URL, url)
1955 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1958 # At this point we have a new video
1959 self._downloader.increment_downloads()
1960 video_id = mobj.group(1)
1962 # Retrieve video webpage to extract further information
1963 request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
1965 self.report_download_webpage(video_id)
1966 webpage = urllib2.urlopen(request).read()
1967 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1968 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1971 # Now we begin extracting as much information as we can from what we
1972 # retrieved. First we extract the information common to all extractors,
1973 # and latter we extract those that are Vimeo specific.
1974 self.report_extraction(video_id)
1977 mobj = re.search(r'<caption>(.*?)</caption>', webpage)
1979 self._downloader.trouble(u'ERROR: unable to extract video title')
1981 video_title = mobj.group(1).decode('utf-8')
1982 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1985 mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
1987 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1989 video_uploader = mobj.group(1).decode('utf-8')
1991 # Extract video thumbnail
1992 mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
1994 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1996 video_thumbnail = mobj.group(1).decode('utf-8')
1998 # # Extract video description
1999 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
2001 # self._downloader.trouble(u'ERROR: unable to extract video description')
2003 # video_description = mobj.group(1).decode('utf-8')
2004 # if not video_description: video_description = 'No description available.'
2005 video_description = 'Foo.'
2007 # Vimeo specific: extract request signature
2008 mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
2010 self._downloader.trouble(u'ERROR: unable to extract request signature')
2012 sig = mobj.group(1).decode('utf-8')
2014 # Vimeo specific: Extract request signature expiration
2015 mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
2017 self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
2019 sig_exp = mobj.group(1).decode('utf-8')
2021 video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp)
2024 # Process video information
2025 self._downloader.process_info({
2026 'id': video_id.decode('utf-8'),
2028 'uploader': video_uploader,
2029 'upload_date': u'NA',
2030 'title': video_title,
2031 'stitle': simple_title,
2033 'thumbnail': video_thumbnail.decode('utf-8'),
2034 'description': video_description,
2035 'thumbnail': video_thumbnail,
2036 'description': video_description,
2039 except UnavailableVideoError:
2040 self._downloader.trouble(u'ERROR: unable to download video')
2043 class GenericIE(InfoExtractor):
2044 """Generic last-resort information extractor."""
2046 def __init__(self, downloader=None):
2047 InfoExtractor.__init__(self, downloader)
2053 def report_download_webpage(self, video_id):
2054 """Report webpage download."""
2055 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
2056 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
2058 def report_extraction(self, video_id):
2059 """Report information extraction."""
2060 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
2062 def _real_initialize(self):
2065 def _real_extract(self, url):
2066 # At this point we have a new video
2067 self._downloader.increment_downloads()
2069 video_id = url.split('/')[-1]
2070 request = urllib2.Request(url)
2072 self.report_download_webpage(video_id)
2073 webpage = urllib2.urlopen(request).read()
2074 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2075 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2077 except ValueError, err:
2078 # since this is the last-resort InfoExtractor, if
2079 # this error is thrown, it'll be thrown here
2080 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2083 self.report_extraction(video_id)
2084 # Start with something easy: JW Player in SWFObject
2085 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
2087 # Broaden the search a little bit
2088 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
2090 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2093 # It's possible that one of the regexes
2094 # matched, but returned an empty group:
2095 if mobj.group(1) is None:
2096 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2099 video_url = urllib.unquote(mobj.group(1))
2100 video_id = os.path.basename(video_url)
2102 # here's a fun little line of code for you:
2103 video_extension = os.path.splitext(video_id)[1][1:]
2104 video_id = os.path.splitext(video_id)[0]
2106 # it's tempting to parse this further, but you would
2107 # have to take into account all the variations like
2108 # Video Title - Site Name
2109 # Site Name | Video Title
2110 # Video Title - Tagline | Site Name
2111 # and so on and so forth; it's just not practical
2112 mobj = re.search(r'<title>(.*)</title>', webpage)
2114 self._downloader.trouble(u'ERROR: unable to extract title')
2116 video_title = mobj.group(1).decode('utf-8')
2117 video_title = sanitize_title(video_title)
2118 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2120 # video uploader is domain name
2121 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
2123 self._downloader.trouble(u'ERROR: unable to extract title')
2125 video_uploader = mobj.group(1).decode('utf-8')
2128 # Process video information
2129 self._downloader.process_info({
2130 'id': video_id.decode('utf-8'),
2131 'url': video_url.decode('utf-8'),
2132 'uploader': video_uploader,
2133 'upload_date': u'NA',
2134 'title': video_title,
2135 'stitle': simple_title,
2136 'ext': video_extension.decode('utf-8'),
2140 except UnavailableVideoError, err:
2141 self._downloader.trouble(u'\nERROR: unable to download video')
2144 class YoutubeSearchIE(InfoExtractor):
2145 """Information Extractor for YouTube search queries."""
2146 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
2147 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
2148 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
2149 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2151 _max_youtube_results = 1000
2153 def __init__(self, youtube_ie, downloader=None):
2154 InfoExtractor.__init__(self, downloader)
2155 self._youtube_ie = youtube_ie
2159 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
2161 def report_download_page(self, query, pagenum):
2162 """Report attempt to download playlist page with given number."""
2163 query = query.decode(preferredencoding())
2164 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
2166 def _real_initialize(self):
2167 self._youtube_ie.initialize()
2169 def _real_extract(self, query):
2170 mobj = re.match(self._VALID_QUERY, query)
2172 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2175 prefix, query = query.split(':')
2177 query = query.encode('utf-8')
2179 self._download_n_results(query, 1)
2181 elif prefix == 'all':
2182 self._download_n_results(query, self._max_youtube_results)
2188 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2190 elif n > self._max_youtube_results:
2191 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
2192 n = self._max_youtube_results
2193 self._download_n_results(query, n)
2195 except ValueError: # parsing prefix as integer fails
2196 self._download_n_results(query, 1)
2199 def _download_n_results(self, query, n):
2200 """Downloads a specified number of results for a query"""
2203 already_seen = set()
2207 self.report_download_page(query, pagenum)
2208 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2209 request = urllib2.Request(result_url)
2211 page = urllib2.urlopen(request).read()
2212 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2213 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2216 # Extract video identifiers
2217 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2218 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2219 if video_id not in already_seen:
2220 video_ids.append(video_id)
2221 already_seen.add(video_id)
2222 if len(video_ids) == n:
2223 # Specified n videos reached
2224 for id in video_ids:
2225 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2228 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2229 for id in video_ids:
2230 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2233 pagenum = pagenum + 1
2236 class GoogleSearchIE(InfoExtractor):
2237 """Information Extractor for Google Video search queries."""
2238 _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
2239 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2240 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
2241 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
2243 _max_google_results = 1000
2245 def __init__(self, google_ie, downloader=None):
2246 InfoExtractor.__init__(self, downloader)
2247 self._google_ie = google_ie
2251 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
2253 def report_download_page(self, query, pagenum):
2254 """Report attempt to download playlist page with given number."""
2255 query = query.decode(preferredencoding())
2256 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
2258 def _real_initialize(self):
2259 self._google_ie.initialize()
2261 def _real_extract(self, query):
2262 mobj = re.match(self._VALID_QUERY, query)
2264 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2267 prefix, query = query.split(':')
2269 query = query.encode('utf-8')
2271 self._download_n_results(query, 1)
2273 elif prefix == 'all':
2274 self._download_n_results(query, self._max_google_results)
2280 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2282 elif n > self._max_google_results:
2283 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
2284 n = self._max_google_results
2285 self._download_n_results(query, n)
2287 except ValueError: # parsing prefix as integer fails
2288 self._download_n_results(query, 1)
2291 def _download_n_results(self, query, n):
2292 """Downloads a specified number of results for a query"""
2295 already_seen = set()
2299 self.report_download_page(query, pagenum)
2300 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2301 request = urllib2.Request(result_url)
2303 page = urllib2.urlopen(request).read()
2304 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2305 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2308 # Extract video identifiers
2309 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2310 video_id = mobj.group(1)
2311 if video_id not in already_seen:
2312 video_ids.append(video_id)
2313 already_seen.add(video_id)
2314 if len(video_ids) == n:
2315 # Specified n videos reached
2316 for id in video_ids:
2317 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2320 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2321 for id in video_ids:
2322 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2325 pagenum = pagenum + 1
2328 class YahooSearchIE(InfoExtractor):
2329 """Information Extractor for Yahoo! Video search queries."""
2330 _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
2331 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2332 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2333 _MORE_PAGES_INDICATOR = r'\s*Next'
2335 _max_yahoo_results = 1000
2337 def __init__(self, yahoo_ie, downloader=None):
2338 InfoExtractor.__init__(self, downloader)
2339 self._yahoo_ie = yahoo_ie
2343 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
2345 def report_download_page(self, query, pagenum):
2346 """Report attempt to download playlist page with given number."""
2347 query = query.decode(preferredencoding())
2348 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2350 def _real_initialize(self):
2351 self._yahoo_ie.initialize()
2353 def _real_extract(self, query):
2354 mobj = re.match(self._VALID_QUERY, query)
2356 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2359 prefix, query = query.split(':')
2361 query = query.encode('utf-8')
2363 self._download_n_results(query, 1)
2365 elif prefix == 'all':
2366 self._download_n_results(query, self._max_yahoo_results)
2372 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2374 elif n > self._max_yahoo_results:
2375 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
2376 n = self._max_yahoo_results
2377 self._download_n_results(query, n)
2379 except ValueError: # parsing prefix as integer fails
2380 self._download_n_results(query, 1)
2383 def _download_n_results(self, query, n):
2384 """Downloads a specified number of results for a query"""
2387 already_seen = set()
2391 self.report_download_page(query, pagenum)
2392 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2393 request = urllib2.Request(result_url)
2395 page = urllib2.urlopen(request).read()
2396 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2397 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2400 # Extract video identifiers
2401 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2402 video_id = mobj.group(1)
2403 if video_id not in already_seen:
2404 video_ids.append(video_id)
2405 already_seen.add(video_id)
2406 if len(video_ids) == n:
2407 # Specified n videos reached
2408 for id in video_ids:
2409 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2412 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2413 for id in video_ids:
2414 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2417 pagenum = pagenum + 1
2420 class YoutubePlaylistIE(InfoExtractor):
2421 """Information Extractor for YouTube playlists."""
2423 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2424 _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2425 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2426 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2429 def __init__(self, youtube_ie, downloader=None):
2430 InfoExtractor.__init__(self, downloader)
2431 self._youtube_ie = youtube_ie
2435 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2437 def report_download_page(self, playlist_id, pagenum):
2438 """Report attempt to download playlist page with given number."""
2439 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2441 def _real_initialize(self):
2442 self._youtube_ie.initialize()
2444 def _real_extract(self, url):
2445 # Extract playlist id
2446 mobj = re.match(self._VALID_URL, url)
2448 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2452 if mobj.group(3) is not None:
2453 self._youtube_ie.extract(mobj.group(3))
2456 # Download playlist pages
2457 # prefix is 'p' as default for playlists but there are other types that need extra care
2458 playlist_prefix = mobj.group(1)
2459 if playlist_prefix == 'a':
2460 playlist_access = 'artist'
2462 playlist_prefix = 'p'
2463 playlist_access = 'view_play_list'
2464 playlist_id = mobj.group(2)
2469 self.report_download_page(playlist_id, pagenum)
2470 request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
2472 page = urllib2.urlopen(request).read()
2473 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2474 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2477 # Extract video identifiers
2479 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2480 if mobj.group(1) not in ids_in_page:
2481 ids_in_page.append(mobj.group(1))
2482 video_ids.extend(ids_in_page)
2484 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2486 pagenum = pagenum + 1
2488 playliststart = self._downloader.params.get('playliststart', 1) - 1
2489 playlistend = self._downloader.params.get('playlistend', -1)
2490 video_ids = video_ids[playliststart:playlistend]
2492 for id in video_ids:
2493 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2497 class YoutubeUserIE(InfoExtractor):
2498 """Information Extractor for YouTube users."""
2500 _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2501 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2502 _GDATA_PAGE_SIZE = 50
2503 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2504 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2507 def __init__(self, youtube_ie, downloader=None):
2508 InfoExtractor.__init__(self, downloader)
2509 self._youtube_ie = youtube_ie
2513 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2515 def report_download_page(self, username, start_index):
2516 """Report attempt to download user page."""
2517 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2518 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2520 def _real_initialize(self):
2521 self._youtube_ie.initialize()
2523 def _real_extract(self, url):
2525 mobj = re.match(self._VALID_URL, url)
2527 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2530 username = mobj.group(1)
2532 # Download video ids using YouTube Data API. Result size per
2533 # query is limited (currently to 50 videos) so we need to query
2534 # page by page until there are no video ids - it means we got
2541 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2542 self.report_download_page(username, start_index)
2544 request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2547 page = urllib2.urlopen(request).read()
2548 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2549 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2552 # Extract video identifiers
2555 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2556 if mobj.group(1) not in ids_in_page:
2557 ids_in_page.append(mobj.group(1))
2559 video_ids.extend(ids_in_page)
2561 # A little optimization - if current page is not
2562 # "full", ie. does not contain PAGE_SIZE video ids then
2563 # we can assume that this page is the last one - there
2564 # are no more ids on further pages - no need to query
2567 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2572 all_ids_count = len(video_ids)
2573 playliststart = self._downloader.params.get('playliststart', 1) - 1
2574 playlistend = self._downloader.params.get('playlistend', -1)
2576 if playlistend == -1:
2577 video_ids = video_ids[playliststart:]
2579 video_ids = video_ids[playliststart:playlistend]
2581 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2582 (username, all_ids_count, len(video_ids)))
2584 for video_id in video_ids:
2585 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2588 class DepositFilesIE(InfoExtractor):
2589 """Information extractor for depositfiles.com"""
2591 _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2593 def __init__(self, downloader=None):
2594 InfoExtractor.__init__(self, downloader)
2598 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2600 def report_download_webpage(self, file_id):
2601 """Report webpage download."""
2602 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2604 def report_extraction(self, file_id):
2605 """Report information extraction."""
2606 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2608 def _real_initialize(self):
2611 def _real_extract(self, url):
2612 # At this point we have a new file
2613 self._downloader.increment_downloads()
2615 file_id = url.split('/')[-1]
2616 # Rebuild url in english locale
2617 url = 'http://depositfiles.com/en/files/' + file_id
2619 # Retrieve file webpage with 'Free download' button pressed
2620 free_download_indication = { 'gateway_result' : '1' }
2621 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2623 self.report_download_webpage(file_id)
2624 webpage = urllib2.urlopen(request).read()
2625 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2626 self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2629 # Search for the real file URL
2630 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2631 if (mobj is None) or (mobj.group(1) is None):
2632 # Try to figure out reason of the error.
2633 mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2634 if (mobj is not None) and (mobj.group(1) is not None):
2635 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2636 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2638 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2641 file_url = mobj.group(1)
2642 file_extension = os.path.splitext(file_url)[1][1:]
2644 # Search for file title
2645 mobj = re.search(r'<b title="(.*?)">', webpage)
2647 self._downloader.trouble(u'ERROR: unable to extract title')
2649 file_title = mobj.group(1).decode('utf-8')
2652 # Process file information
2653 self._downloader.process_info({
2654 'id': file_id.decode('utf-8'),
2655 'url': file_url.decode('utf-8'),
2657 'upload_date': u'NA',
2658 'title': file_title,
2659 'stitle': file_title,
2660 'ext': file_extension.decode('utf-8'),
2664 except UnavailableVideoError, err:
2665 self._downloader.trouble(u'ERROR: unable to download file')
2668 class FacebookIE(InfoExtractor):
2669 """Information Extractor for Facebook"""
2671 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2672 _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2673 _NETRC_MACHINE = 'facebook'
2674 _available_formats = ['highqual', 'lowqual']
2675 _video_extensions = {
2680 def __init__(self, downloader=None):
2681 InfoExtractor.__init__(self, downloader)
2685 return (re.match(FacebookIE._VALID_URL, url) is not None)
2687 def _reporter(self, message):
2688 """Add header and report message."""
2689 self._downloader.to_screen(u'[facebook] %s' % message)
2691 def report_login(self):
2692 """Report attempt to log in."""
2693 self._reporter(u'Logging in')
2695 def report_video_webpage_download(self, video_id):
2696 """Report attempt to download video webpage."""
2697 self._reporter(u'%s: Downloading video webpage' % video_id)
2699 def report_information_extraction(self, video_id):
2700 """Report attempt to extract video information."""
2701 self._reporter(u'%s: Extracting video information' % video_id)
2703 def _parse_page(self, video_webpage):
2704 """Extract video information from page"""
2706 data = {'title': r'class="video_title datawrap">(.*?)</',
2707 'description': r'<div class="datawrap">(.*?)</div>',
2708 'owner': r'\("video_owner_name", "(.*?)"\)',
2709 'upload_date': r'data-date="(.*?)"',
2710 'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2713 for piece in data.keys():
2714 mobj = re.search(data[piece], video_webpage)
2715 if mobj is not None:
2716 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2720 for fmt in self._available_formats:
2721 mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2722 if mobj is not None:
2723 # URL is in a Javascript segment inside an escaped Unicode format within
2724 # the generally utf-8 page
2725 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2726 video_info['video_urls'] = video_urls
2730 def _real_initialize(self):
2731 if self._downloader is None:
2736 downloader_params = self._downloader.params
2738 # Attempt to use provided username and password or .netrc data
2739 if downloader_params.get('username', None) is not None:
2740 useremail = downloader_params['username']
2741 password = downloader_params['password']
2742 elif downloader_params.get('usenetrc', False):
2744 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2745 if info is not None:
2749 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2750 except (IOError, netrc.NetrcParseError), err:
2751 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2754 if useremail is None:
2763 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2766 login_results = urllib2.urlopen(request).read()
2767 if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2768 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2770 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2771 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2774 def _real_extract(self, url):
2775 mobj = re.match(self._VALID_URL, url)
2777 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2779 video_id = mobj.group('ID')
2782 self.report_video_webpage_download(video_id)
2783 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2785 page = urllib2.urlopen(request)
2786 video_webpage = page.read()
2787 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2788 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2791 # Start extracting information
2792 self.report_information_extraction(video_id)
2794 # Extract information
2795 video_info = self._parse_page(video_webpage)
2798 if 'owner' not in video_info:
2799 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2801 video_uploader = video_info['owner']
2804 if 'title' not in video_info:
2805 self._downloader.trouble(u'ERROR: unable to extract video title')
2807 video_title = video_info['title']
2808 video_title = video_title.decode('utf-8')
2809 video_title = sanitize_title(video_title)
2812 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2813 simple_title = simple_title.strip(ur'_')
2816 if 'thumbnail' not in video_info:
2817 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2818 video_thumbnail = ''
2820 video_thumbnail = video_info['thumbnail']
2824 if 'upload_date' in video_info:
2825 upload_time = video_info['upload_date']
2826 timetuple = email.utils.parsedate_tz(upload_time)
2827 if timetuple is not None:
2829 upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2834 video_description = video_info.get('description', 'No description available.')
2836 url_map = video_info['video_urls']
2837 if len(url_map.keys()) > 0:
2838 # Decide which formats to download
2839 req_format = self._downloader.params.get('format', None)
2840 format_limit = self._downloader.params.get('format_limit', None)
2842 if format_limit is not None and format_limit in self._available_formats:
2843 format_list = self._available_formats[self._available_formats.index(format_limit):]
2845 format_list = self._available_formats
2846 existing_formats = [x for x in format_list if x in url_map]
2847 if len(existing_formats) == 0:
2848 self._downloader.trouble(u'ERROR: no known formats available for video')
2850 if req_format is None:
2851 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2852 elif req_format == '-1':
2853 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2856 if req_format not in url_map:
2857 self._downloader.trouble(u'ERROR: requested format not available')
2859 video_url_list = [(req_format, url_map[req_format])] # Specific format
2861 for format_param, video_real_url in video_url_list:
2863 # At this point we have a new video
2864 self._downloader.increment_downloads()
2867 video_extension = self._video_extensions.get(format_param, 'mp4')
2870 # Process video information
2871 self._downloader.process_info({
2872 'id': video_id.decode('utf-8'),
2873 'url': video_real_url.decode('utf-8'),
2874 'uploader': video_uploader.decode('utf-8'),
2875 'upload_date': upload_date,
2876 'title': video_title,
2877 'stitle': simple_title,
2878 'ext': video_extension.decode('utf-8'),
2879 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
2880 'thumbnail': video_thumbnail.decode('utf-8'),
2881 'description': video_description.decode('utf-8'),
2884 except UnavailableVideoError, err:
2885 self._downloader.trouble(u'\nERROR: unable to download video')
2887 class BlipTVIE(InfoExtractor):
2888 """Information extractor for blip.tv"""
2890 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
2891 _URL_EXT = r'^.*\.([a-z0-9]+)$'
2895 return (re.match(BlipTVIE._VALID_URL, url) is not None)
2897 def report_extraction(self, file_id):
2898 """Report information extraction."""
2899 self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id)
2901 def _simplify_title(self, title):
2902 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
2903 res = res.strip(ur'_')
2906 def _real_extract(self, url):
2907 mobj = re.match(self._VALID_URL, url)
2909 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2916 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
2917 request = urllib2.Request(json_url)
2918 self.report_extraction(mobj.group(1))
2920 json_code = urllib2.urlopen(request).read()
2921 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2922 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
2925 json_data = json.loads(json_code)
2926 if 'Post' in json_data:
2927 data = json_data['Post']
2931 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
2932 video_url = data['media']['url']
2933 umobj = re.match(self._URL_EXT, video_url)
2935 raise ValueError('Can not determine filename extension')
2936 ext = umobj.group(1)
2938 self._downloader.increment_downloads()
2941 'id': data['item_id'],
2943 'uploader': data['display_name'],
2944 'upload_date': upload_date,
2945 'title': data['title'],
2946 'stitle': self._simplify_title(data['title']),
2948 'format': data['media']['mimeType'],
2949 'thumbnail': data['thumbnailUrl'],
2950 'description': data['description'],
2951 'player_url': data['embedUrl']
2953 except (ValueError,KeyError), err:
2954 self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
2958 self._downloader.process_info(info)
2959 except UnavailableVideoError, err:
2960 self._downloader.trouble(u'\nERROR: unable to download video')
2963 class MyVideoIE(InfoExtractor):
2964 """Information Extractor for myvideo.de."""
2966 _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
2968 def __init__(self, downloader=None):
2969 InfoExtractor.__init__(self, downloader)
2973 return (re.match(MyVideoIE._VALID_URL, url) is not None)
2975 def report_download_webpage(self, video_id):
2976 """Report webpage download."""
2977 self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
2979 def report_extraction(self, video_id):
2980 """Report information extraction."""
2981 self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
2983 def _real_initialize(self):
2986 def _real_extract(self,url):
2987 mobj = re.match(self._VALID_URL, url)
2989 self._download.trouble(u'ERROR: invalid URL: %s' % url)
2992 video_id = mobj.group(1)
2993 simple_title = mobj.group(2).decode('utf-8')
2994 # should actually not be necessary
2995 simple_title = sanitize_title(simple_title)
2996 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title)
2999 request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
3001 self.report_download_webpage(video_id)
3002 webpage = urllib2.urlopen(request).read()
3003 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3004 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
3007 self.report_extraction(video_id)
3008 mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
3011 self._downloader.trouble(u'ERROR: unable to extract media URL')
3013 video_url = mobj.group(1) + ('/%s.flv' % video_id)
3015 mobj = re.search('<title>([^<]+)</title>', webpage)
3017 self._downloader.trouble(u'ERROR: unable to extract title')
3020 video_title = mobj.group(1)
3021 video_title = sanitize_title(video_title)
3025 self._downloader.process_info({
3029 'upload_date': u'NA',
3030 'title': video_title,
3031 'stitle': simple_title,
3036 except UnavailableVideoError:
3037 self._downloader.trouble(u'\nERROR: Unable to download video')
3039 class ComedyCentralIE(InfoExtractor):
3040 """Information extractor for The Daily Show and Colbert Report """
3042 _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
3046 return (re.match(ComedyCentralIE._VALID_URL, url) is not None)
3048 def report_extraction(self, episode_id):
3049 self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
3051 def report_config_download(self, episode_id):
3052 self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
3054 def report_player_url(self, episode_id):
3055 self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
3057 def _simplify_title(self, title):
3058 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
3059 res = res.strip(ur'_')
3062 def _real_extract(self, url):
3063 mobj = re.match(self._VALID_URL, url)
3065 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3068 if mobj.group('shortname'):
3069 if mobj.group('shortname') in ('tds', 'thedailyshow'):
3070 url = 'http://www.thedailyshow.com/full-episodes/'
3072 url = 'http://www.colbertnation.com/full-episodes/'
3073 mobj = re.match(self._VALID_URL, url)
3074 assert mobj is not None
3076 dlNewest = not mobj.group('episode')
3078 epTitle = mobj.group('showname')
3080 epTitle = mobj.group('episode')
3082 req = urllib2.Request(url)
3083 self.report_extraction(epTitle)
3085 htmlHandle = urllib2.urlopen(req)
3086 html = htmlHandle.read()
3087 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3088 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3091 url = htmlHandle.geturl()
3092 mobj = re.match(self._VALID_URL, url)
3094 self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
3096 if mobj.group('episode') == '':
3097 self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
3099 epTitle = mobj.group('episode')
3101 mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/(.*?:episode:([^:]*):)(.*?))"/>', html)
3102 if len(mMovieParams) == 0:
3103 self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
3105 show_id = mMovieParams[0][2]
3106 ACT_COUNT = { # TODO: Detect this dynamically
3107 'thedailyshow.com': 4,
3108 'colbertnation.com': 3,
3111 'thedailyshow.com': 1,
3112 'colbertnation.com': 1,
3115 first_player_url = mMovieParams[0][0]
3116 startMediaNum = int(mMovieParams[0][3]) + OFFSET
3117 movieId = mMovieParams[0][1]
3119 playerReq = urllib2.Request(first_player_url)
3120 self.report_player_url(epTitle)
3122 playerResponse = urllib2.urlopen(playerReq)
3123 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3124 self._downloader.trouble(u'ERROR: unable to download player: %s' % unicode(err))
3126 player_url = playerResponse.geturl()
3128 for actNum in range(ACT_COUNT):
3129 mediaNum = startMediaNum + actNum
3130 mediaId = movieId + str(mediaNum)
3131 configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
3132 urllib.urlencode({'uri': mediaId}))
3133 configReq = urllib2.Request(configUrl)
3134 self.report_config_download(epTitle)
3136 configXml = urllib2.urlopen(configReq).read()
3137 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3138 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3141 cdoc = xml.etree.ElementTree.fromstring(configXml)
3143 for rendition in cdoc.findall('.//rendition'):
3144 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
3148 self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum) + ': No videos found')
3151 # For now, just pick the highest bitrate
3152 format,video_url = turls[-1]
3154 self._downloader.increment_downloads()
3156 effTitle = show_id.replace('.com', '') + '-' + epTitle
3158 'id': str(mediaNum),
3160 'uploader': show_id,
3161 'upload_date': 'NA',
3163 'stitle': self._simplify_title(effTitle),
3167 'description': 'TODO: Not yet supported',
3168 'player_url': player_url
3172 self._downloader.process_info(info)
3173 except UnavailableVideoError, err:
3174 self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum))
3178 class PostProcessor(object):
3179 """Post Processor class.
3181 PostProcessor objects can be added to downloaders with their
3182 add_post_processor() method. When the downloader has finished a
3183 successful download, it will take its internal chain of PostProcessors
3184 and start calling the run() method on each one of them, first with
3185 an initial argument and then with the returned value of the previous
3188 The chain will be stopped if one of them ever returns None or the end
3189 of the chain is reached.
3191 PostProcessor objects follow a "mutual registration" process similar
3192 to InfoExtractor objects.
3197 def __init__(self, downloader=None):
3198 self._downloader = downloader
3200 def set_downloader(self, downloader):
3201 """Sets the downloader for this PP."""
3202 self._downloader = downloader
3204 def run(self, information):
3205 """Run the PostProcessor.
3207 The "information" argument is a dictionary like the ones
3208 composed by InfoExtractors. The only difference is that this
3209 one has an extra field called "filepath" that points to the
3212 When this method returns None, the postprocessing chain is
3213 stopped. However, this method may return an information
3214 dictionary that will be passed to the next postprocessing
3215 object in the chain. It can be the one it received after
3216 changing some fields.
3218 In addition, this method may raise a PostProcessingError
3219 exception that will be taken into account by the downloader
3222 return information # by default, do nothing
3225 class FFmpegExtractAudioPP(PostProcessor):
3227 def __init__(self, downloader=None, preferredcodec=None):
3228 PostProcessor.__init__(self, downloader)
3229 if preferredcodec is None:
3230 preferredcodec = 'best'
3231 self._preferredcodec = preferredcodec
3234 def get_audio_codec(path):
3236 cmd = ['ffprobe', '-show_streams', '--', path]
3237 handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
3238 output = handle.communicate()[0]
3239 if handle.wait() != 0:
3241 except (IOError, OSError):
3244 for line in output.split('\n'):
3245 if line.startswith('codec_name='):
3246 audio_codec = line.split('=')[1].strip()
3247 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
3252 def run_ffmpeg(path, out_path, codec, more_opts):
3254 cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
3255 ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
3257 except (IOError, OSError):
3260 def run(self, information):
3261 path = information['filepath']
3263 filecodec = self.get_audio_codec(path)
3264 if filecodec is None:
3265 self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
3269 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
3270 if filecodec == 'aac' or filecodec == 'mp3':
3271 # Lossless if possible
3273 extension = filecodec
3274 if filecodec == 'aac':
3275 more_opts = ['-f', 'adts']
3278 acodec = 'libmp3lame'
3280 more_opts = ['-ab', '128k']
3282 # We convert the audio (lossy)
3283 acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
3284 extension = self._preferredcodec
3285 more_opts = ['-ab', '128k']
3286 if self._preferredcodec == 'aac':
3287 more_opts += ['-f', 'adts']
3289 (prefix, ext) = os.path.splitext(path)
3290 new_path = prefix + '.' + extension
3291 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
3292 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
3295 self._downloader.to_stderr(u'WARNING: error running ffmpeg')
3300 except (IOError, OSError):
3301 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
3304 information['filepath'] = new_path
3308 def updateSelf(downloader, filename):
3309 ''' Update the program file with the latest version from the repository '''
3310 # Note: downloader only used for options
3311 if not os.access(filename, os.W_OK):
3312 sys.exit('ERROR: no write permissions on %s' % filename)
3314 downloader.to_screen('Updating to latest version...')
3318 urlh = urllib.urlopen(UPDATE_URL)
3319 newcontent = urlh.read()
3322 except (IOError, OSError), err:
3323 sys.exit('ERROR: unable to download latest version')
3326 outf = open(filename, 'wb')
3328 outf.write(newcontent)
3331 except (IOError, OSError), err:
3332 sys.exit('ERROR: unable to overwrite current version')
3334 downloader.to_screen('Updated youtube-dl. Restart to use the new version.')
3341 def _format_option_string(option):
3342 ''' ('-o', '--option') -> -o, --format METAVAR'''
3346 if option._short_opts: opts.append(option._short_opts[0])
3347 if option._long_opts: opts.append(option._long_opts[0])
3348 if len(opts) > 1: opts.insert(1, ', ')
3350 if option.takes_value(): opts.append(' %s' % option.metavar)
3352 return "".join(opts)
3354 def _find_term_columns():
3355 columns = os.environ.get('COLUMNS', None)
3360 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3361 out,err = sp.communicate()
3362 return int(out.split()[1])
3368 max_help_position = 80
3370 # No need to wrap help messages if we're on a wide console
3371 columns = _find_term_columns()
3372 if columns: max_width = columns
3374 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
3375 fmt.format_option_strings = _format_option_string
3378 'version' : __version__,
3380 'usage' : '%prog [options] url...',
3381 'conflict_handler' : 'resolve',
3384 parser = optparse.OptionParser(**kw)
3387 general = optparse.OptionGroup(parser, 'General Options')
3388 authentication = optparse.OptionGroup(parser, 'Authentication Options')
3389 video_format = optparse.OptionGroup(parser, 'Video Format Options')
3390 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
3391 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
3392 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
3394 general.add_option('-h', '--help',
3395 action='help', help='print this help text and exit')
3396 general.add_option('-v', '--version',
3397 action='version', help='print program version and exit')
3398 general.add_option('-U', '--update',
3399 action='store_true', dest='update_self', help='update this program to latest version')
3400 general.add_option('-i', '--ignore-errors',
3401 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
3402 general.add_option('-r', '--rate-limit',
3403 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
3404 general.add_option('-R', '--retries',
3405 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
3406 general.add_option('--playlist-start',
3407 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
3408 general.add_option('--playlist-end',
3409 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
3410 general.add_option('--dump-user-agent',
3411 action='store_true', dest='dump_user_agent',
3412 help='display the current browser identification', default=False)
3414 authentication.add_option('-u', '--username',
3415 dest='username', metavar='USERNAME', help='account username')
3416 authentication.add_option('-p', '--password',
3417 dest='password', metavar='PASSWORD', help='account password')
3418 authentication.add_option('-n', '--netrc',
3419 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
3422 video_format.add_option('-f', '--format',
3423 action='store', dest='format', metavar='FORMAT', help='video format code')
3424 video_format.add_option('--all-formats',
3425 action='store_const', dest='format', help='download all available video formats', const='-1')
3426 video_format.add_option('--max-quality',
3427 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
3430 verbosity.add_option('-q', '--quiet',
3431 action='store_true', dest='quiet', help='activates quiet mode', default=False)
3432 verbosity.add_option('-s', '--simulate',
3433 action='store_true', dest='simulate', help='do not download video', default=False)
3434 verbosity.add_option('-g', '--get-url',
3435 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
3436 verbosity.add_option('-e', '--get-title',
3437 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
3438 verbosity.add_option('--get-thumbnail',
3439 action='store_true', dest='getthumbnail',
3440 help='simulate, quiet but print thumbnail URL', default=False)
3441 verbosity.add_option('--get-description',
3442 action='store_true', dest='getdescription',
3443 help='simulate, quiet but print video description', default=False)
3444 verbosity.add_option('--get-filename',
3445 action='store_true', dest='getfilename',
3446 help='simulate, quiet but print output filename', default=False)
3447 verbosity.add_option('--no-progress',
3448 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
3449 verbosity.add_option('--console-title',
3450 action='store_true', dest='consoletitle',
3451 help='display progress in console titlebar', default=False)
3454 filesystem.add_option('-t', '--title',
3455 action='store_true', dest='usetitle', help='use title in file name', default=False)
3456 filesystem.add_option('-l', '--literal',
3457 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
3458 filesystem.add_option('-A', '--auto-number',
3459 action='store_true', dest='autonumber',
3460 help='number downloaded files starting from 00000', default=False)
3461 filesystem.add_option('-o', '--output',
3462 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
3463 filesystem.add_option('-a', '--batch-file',
3464 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
3465 filesystem.add_option('-w', '--no-overwrites',
3466 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
3467 filesystem.add_option('-c', '--continue',
3468 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
3469 filesystem.add_option('--cookies',
3470 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
3471 filesystem.add_option('--no-part',
3472 action='store_true', dest='nopart', help='do not use .part files', default=False)
3473 filesystem.add_option('--no-mtime',
3474 action='store_false', dest='updatetime',
3475 help='do not use the Last-modified header to set the file modification time', default=True)
3476 filesystem.add_option('--write-description',
3477 action='store_true', dest='writedescription',
3478 help='write video description to a .description file', default=False)
3479 filesystem.add_option('--write-info-json',
3480 action='store_true', dest='writeinfojson',
3481 help='write video metadata to a .info.json file', default=False)
3484 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
3485 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
3486 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
3487 help='"best", "aac" or "mp3"; best by default')
3490 parser.add_option_group(general)
3491 parser.add_option_group(filesystem)
3492 parser.add_option_group(verbosity)
3493 parser.add_option_group(video_format)
3494 parser.add_option_group(authentication)
3495 parser.add_option_group(postproc)
3497 opts, args = parser.parse_args()
3499 return parser, opts, args
3502 parser, opts, args = parseOpts()
3504 # Open appropriate CookieJar
3505 if opts.cookiefile is None:
3506 jar = cookielib.CookieJar()
3509 jar = cookielib.MozillaCookieJar(opts.cookiefile)
3510 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
3512 except (IOError, OSError), err:
3513 sys.exit(u'ERROR: unable to open cookie file')
3516 if opts.dump_user_agent:
3517 print std_headers['User-Agent']
3520 # General configuration
3521 cookie_processor = urllib2.HTTPCookieProcessor(jar)
3522 opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
3523 urllib2.install_opener(opener)
3524 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
3526 # Batch file verification
3528 if opts.batchfile is not None:
3530 if opts.batchfile == '-':
3533 batchfd = open(opts.batchfile, 'r')
3534 batchurls = batchfd.readlines()
3535 batchurls = [x.strip() for x in batchurls]
3536 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
3538 sys.exit(u'ERROR: batch file could not be read')
3539 all_urls = batchurls + args
3541 # Conflicting, missing and erroneous options
3542 if opts.usenetrc and (opts.username is not None or opts.password is not None):
3543 parser.error(u'using .netrc conflicts with giving username/password')
3544 if opts.password is not None and opts.username is None:
3545 parser.error(u'account username missing')
3546 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
3547 parser.error(u'using output template conflicts with using title, literal title or auto number')
3548 if opts.usetitle and opts.useliteral:
3549 parser.error(u'using title conflicts with using literal title')
3550 if opts.username is not None and opts.password is None:
3551 opts.password = getpass.getpass(u'Type account password and press return:')
3552 if opts.ratelimit is not None:
3553 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
3554 if numeric_limit is None:
3555 parser.error(u'invalid rate limit specified')
3556 opts.ratelimit = numeric_limit
3557 if opts.retries is not None:
3559 opts.retries = long(opts.retries)
3560 except (TypeError, ValueError), err:
3561 parser.error(u'invalid retry count specified')
3563 opts.playliststart = int(opts.playliststart)
3564 if opts.playliststart <= 0:
3565 raise ValueError(u'Playlist start must be positive')
3566 except (TypeError, ValueError), err:
3567 parser.error(u'invalid playlist start number specified')
3569 opts.playlistend = int(opts.playlistend)
3570 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
3571 raise ValueError(u'Playlist end must be greater than playlist start')
3572 except (TypeError, ValueError), err:
3573 parser.error(u'invalid playlist end number specified')
3574 if opts.extractaudio:
3575 if opts.audioformat not in ['best', 'aac', 'mp3']:
3576 parser.error(u'invalid audio format specified')
3578 # Information extractors
3579 youtube_ie = YoutubeIE()
3580 metacafe_ie = MetacafeIE(youtube_ie)
3581 dailymotion_ie = DailymotionIE()
3582 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
3583 youtube_user_ie = YoutubeUserIE(youtube_ie)
3584 youtube_search_ie = YoutubeSearchIE(youtube_ie)
3585 google_ie = GoogleIE()
3586 google_search_ie = GoogleSearchIE(google_ie)
3587 photobucket_ie = PhotobucketIE()
3588 yahoo_ie = YahooIE()
3589 yahoo_search_ie = YahooSearchIE(yahoo_ie)
3590 deposit_files_ie = DepositFilesIE()
3591 facebook_ie = FacebookIE()
3592 bliptv_ie = BlipTVIE()
3593 vimeo_ie = VimeoIE()
3594 myvideo_ie = MyVideoIE()
3595 comedycentral_ie = ComedyCentralIE()
3597 generic_ie = GenericIE()
3600 fd = FileDownloader({
3601 'usenetrc': opts.usenetrc,
3602 'username': opts.username,
3603 'password': opts.password,
3604 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
3605 'forceurl': opts.geturl,
3606 'forcetitle': opts.gettitle,
3607 'forcethumbnail': opts.getthumbnail,
3608 'forcedescription': opts.getdescription,
3609 'forcefilename': opts.getfilename,
3610 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
3611 'format': opts.format,
3612 'format_limit': opts.format_limit,
3613 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
3614 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
3615 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
3616 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
3617 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
3618 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
3619 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
3620 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
3621 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
3622 or u'%(id)s.%(ext)s'),
3623 'ignoreerrors': opts.ignoreerrors,
3624 'ratelimit': opts.ratelimit,
3625 'nooverwrites': opts.nooverwrites,
3626 'retries': opts.retries,
3627 'continuedl': opts.continue_dl,
3628 'noprogress': opts.noprogress,
3629 'playliststart': opts.playliststart,
3630 'playlistend': opts.playlistend,
3631 'logtostderr': opts.outtmpl == '-',
3632 'consoletitle': opts.consoletitle,
3633 'nopart': opts.nopart,
3634 'updatetime': opts.updatetime,
3635 'writedescription': opts.writedescription,
3636 'writeinfojson': opts.writeinfojson,
3638 fd.add_info_extractor(youtube_search_ie)
3639 fd.add_info_extractor(youtube_pl_ie)
3640 fd.add_info_extractor(youtube_user_ie)
3641 fd.add_info_extractor(metacafe_ie)
3642 fd.add_info_extractor(dailymotion_ie)
3643 fd.add_info_extractor(youtube_ie)
3644 fd.add_info_extractor(google_ie)
3645 fd.add_info_extractor(google_search_ie)
3646 fd.add_info_extractor(photobucket_ie)
3647 fd.add_info_extractor(yahoo_ie)
3648 fd.add_info_extractor(yahoo_search_ie)
3649 fd.add_info_extractor(deposit_files_ie)
3650 fd.add_info_extractor(facebook_ie)
3651 fd.add_info_extractor(bliptv_ie)
3652 fd.add_info_extractor(vimeo_ie)
3653 fd.add_info_extractor(myvideo_ie)
3654 fd.add_info_extractor(comedycentral_ie)
3656 # This must come last since it's the
3657 # fallback if none of the others work
3658 fd.add_info_extractor(generic_ie)
3661 if opts.extractaudio:
3662 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
3665 if opts.update_self:
3666 updateSelf(fd, sys.argv[0])
3669 if len(all_urls) < 1:
3670 if not opts.update_self:
3671 parser.error(u'you must provide at least one URL')
3674 retcode = fd.download(all_urls)
3676 # Dump cookie jar if requested
3677 if opts.cookiefile is not None:
3680 except (IOError, OSError), err:
3681 sys.exit(u'ERROR: unable to save cookie jar')
3686 if __name__ == '__main__':
3689 except DownloadError:
3691 except SameFileError:
3692 sys.exit(u'ERROR: fixed output name but more than one file to download')
3693 except KeyboardInterrupt:
3694 sys.exit(u'\nERROR: Interrupted by user')
3696 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: