2 # -*- coding: utf-8 -*-
5 'Ricardo Garcia Gonzalez',
13 'Philipp Hagemeister',
17 __license__ = 'Public Domain'
18 __version__ = '2011.09.14'
20 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
48 except ImportError: # Python 2.4
51 import cStringIO as StringIO
55 # parse_qs was moved from the cgi module to the urlparse module recently.
57 from urlparse import parse_qs
59 from cgi import parse_qs
67 import xml.etree.ElementTree
68 except ImportError: # Python<2.5
69 pass # Not officially supported, but let it slip
72 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
73 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
74 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
75 'Accept-Encoding': 'gzip, deflate',
76 'Accept-Language': 'en-us,en;q=0.5',
79 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
83 except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
89 def raiseError(msg, i):
90 raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
91 def skipSpace(i, expectMore=True):
92 while i < len(s) and s[i] in ' \t\r\n':
96 raiseError('Premature end', i)
98 def decodeEscape(match):
114 return unichr(int(esc[1:5], 16))
115 if len(esc) == 5+6 and esc[5:7] == '\\u':
116 hi = int(esc[1:5], 16)
117 low = int(esc[7:11], 16)
118 return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
119 raise ValueError('Unknown escape ' + str(esc))
126 while s[e-bslashes-1] == '\\':
128 if bslashes % 2 == 1:
132 rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
133 stri = rexp.sub(decodeEscape, s[i:e])
139 if s[i] == '}': # Empty dictionary
143 raiseError('Expected a string object key', i)
144 i,key = parseString(i)
146 if i >= len(s) or s[i] != ':':
147 raiseError('Expected a colon', i)
154 raiseError('Expected comma or closing curly brace', i)
159 if s[i] == ']': # Empty array
164 i = skipSpace(i) # Raise exception if premature end
168 raiseError('Expected a comma or closing bracket', i)
170 def parseDiscrete(i):
171 for k,v in {'true': True, 'false': False, 'null': None}.items():
172 if s.startswith(k, i):
174 raiseError('Not a boolean (or null)', i)
176 mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
178 raiseError('Not a number', i)
180 if '.' in nums or 'e' in nums or 'E' in nums:
181 return (i+len(nums), float(nums))
182 return (i+len(nums), int(nums))
183 CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
186 i,res = CHARMAP.get(s[i], parseNumber)(i)
187 i = skipSpace(i, False)
191 raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
194 def preferredencoding():
195 """Get preferred encoding.
197 Returns the best encoding scheme for the system, based on
198 locale.getpreferredencoding() and some further tweaks.
200 def yield_preferredencoding():
202 pref = locale.getpreferredencoding()
208 return yield_preferredencoding().next()
211 def htmlentity_transform(matchobj):
212 """Transforms an HTML entity to a Unicode character.
214 This function receives a match object and is intended to be used with
215 the re.sub() function.
217 entity = matchobj.group(1)
219 # Known non-numeric HTML entity
220 if entity in htmlentitydefs.name2codepoint:
221 return unichr(htmlentitydefs.name2codepoint[entity])
224 mobj = re.match(ur'(?u)#(x?\d+)', entity)
226 numstr = mobj.group(1)
227 if numstr.startswith(u'x'):
229 numstr = u'0%s' % numstr
232 return unichr(long(numstr, base))
234 # Unknown entity in name, return its literal representation
235 return (u'&%s;' % entity)
238 def sanitize_title(utitle):
239 """Sanitizes a video title so it could be used as part of a filename."""
240 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
241 return utitle.replace(unicode(os.sep), u'%')
244 def sanitize_open(filename, open_mode):
245 """Try to open the given filename, and slightly tweak it if this fails.
247 Attempts to open the given filename. If this fails, it tries to change
248 the filename slightly, step by step, until it's either able to open it
249 or it fails and raises a final exception, like the standard open()
252 It returns the tuple (stream, definitive_file_name).
256 if sys.platform == 'win32':
258 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
259 return (sys.stdout, filename)
260 stream = open(filename, open_mode)
261 return (stream, filename)
262 except (IOError, OSError), err:
263 # In case of error, try to remove win32 forbidden chars
264 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
266 # An exception here should be caught in the caller
267 stream = open(filename, open_mode)
268 return (stream, filename)
271 def timeconvert(timestr):
272 """Convert RFC 2822 defined time string into system timestamp"""
274 timetuple = email.utils.parsedate_tz(timestr)
275 if timetuple is not None:
276 timestamp = email.utils.mktime_tz(timetuple)
280 class DownloadError(Exception):
281 """Download Error exception.
283 This exception may be thrown by FileDownloader objects if they are not
284 configured to continue on errors. They will contain the appropriate
290 class SameFileError(Exception):
291 """Same File exception.
293 This exception will be thrown by FileDownloader objects if they detect
294 multiple files would have to be downloaded to the same file on disk.
299 class PostProcessingError(Exception):
300 """Post Processing exception.
302 This exception may be raised by PostProcessor's .run() method to
303 indicate an error in the postprocessing task.
308 class UnavailableVideoError(Exception):
309 """Unavailable Format exception.
311 This exception will be thrown when a video is requested
312 in a format that is not available for that video.
317 class ContentTooShortError(Exception):
318 """Content Too Short exception.
320 This exception may be raised by FileDownloader objects when a file they
321 download is too small for what the server announced first, indicating
322 the connection was probably interrupted.
328 def __init__(self, downloaded, expected):
329 self.downloaded = downloaded
330 self.expected = expected
333 class YoutubeDLHandler(urllib2.HTTPHandler):
334 """Handler for HTTP requests and responses.
336 This class, when installed with an OpenerDirector, automatically adds
337 the standard headers to every HTTP request and handles gzipped and
338 deflated responses from web servers. If compression is to be avoided in
339 a particular request, the original request in the program code only has
340 to include the HTTP header "Youtubedl-No-Compression", which will be
341 removed before making the real request.
343 Part of this code was copied from:
345 http://techknack.net/python-urllib2-handlers/
347 Andrew Rowls, the author of that code, agreed to release it to the
354 return zlib.decompress(data, -zlib.MAX_WBITS)
356 return zlib.decompress(data)
359 def addinfourl_wrapper(stream, headers, url, code):
360 if hasattr(urllib2.addinfourl, 'getcode'):
361 return urllib2.addinfourl(stream, headers, url, code)
362 ret = urllib2.addinfourl(stream, headers, url)
366 def http_request(self, req):
367 for h in std_headers:
370 req.add_header(h, std_headers[h])
371 if 'Youtubedl-no-compression' in req.headers:
372 if 'Accept-encoding' in req.headers:
373 del req.headers['Accept-encoding']
374 del req.headers['Youtubedl-no-compression']
377 def http_response(self, req, resp):
380 if resp.headers.get('Content-encoding', '') == 'gzip':
381 gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
382 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
383 resp.msg = old_resp.msg
385 if resp.headers.get('Content-encoding', '') == 'deflate':
386 gz = StringIO.StringIO(self.deflate(resp.read()))
387 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
388 resp.msg = old_resp.msg
392 class FileDownloader(object):
393 """File Downloader class.
395 File downloader objects are the ones responsible of downloading the
396 actual video file and writing it to disk if the user has requested
397 it, among some other tasks. In most cases there should be one per
398 program. As, given a video URL, the downloader doesn't know how to
399 extract all the needed information, task that InfoExtractors do, it
400 has to pass the URL to one of them.
402 For this, file downloader objects have a method that allows
403 InfoExtractors to be registered in a given order. When it is passed
404 a URL, the file downloader handles it to the first InfoExtractor it
405 finds that reports being able to handle it. The InfoExtractor extracts
406 all the information about the video or videos the URL refers to, and
407 asks the FileDownloader to process the video information, possibly
408 downloading the video.
410 File downloaders accept a lot of parameters. In order not to saturate
411 the object constructor with arguments, it receives a dictionary of
412 options instead. These options are available through the params
413 attribute for the InfoExtractors to use. The FileDownloader also
414 registers itself as the downloader in charge for the InfoExtractors
415 that are added to it, so this is a "mutual registration".
419 username: Username for authentication purposes.
420 password: Password for authentication purposes.
421 usenetrc: Use netrc for authentication instead.
422 quiet: Do not print messages to stdout.
423 forceurl: Force printing final URL.
424 forcetitle: Force printing title.
425 forcethumbnail: Force printing thumbnail URL.
426 forcedescription: Force printing description.
427 forcefilename: Force printing final filename.
428 simulate: Do not download the video files.
429 format: Video format code.
430 format_limit: Highest quality format to try.
431 outtmpl: Template for output names.
432 ignoreerrors: Do not stop on download errors.
433 ratelimit: Download speed limit, in bytes/sec.
434 nooverwrites: Prevent overwriting files.
435 retries: Number of times to retry for HTTP error 5xx
436 continuedl: Try to continue downloads if possible.
437 noprogress: Do not print the progress bar.
438 playliststart: Playlist item to start at.
439 playlistend: Playlist item to end at.
440 logtostderr: Log messages to stderr instead of stdout.
441 consoletitle: Display progress in console window's titlebar.
442 nopart: Do not use temporary .part files.
443 updatetime: Use the Last-modified header to set output file timestamps.
444 writedescription: Write the video description to a .description file
445 writeinfojson: Write the video description to a .info.json file
451 _download_retcode = None
452 _num_downloads = None
455 def __init__(self, params):
456 """Create a FileDownloader object with the given options."""
459 self._download_retcode = 0
460 self._num_downloads = 0
461 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
465 def format_bytes(bytes):
468 if type(bytes) is str:
473 exponent = long(math.log(bytes, 1024.0))
474 suffix = 'bkMGTPEZY'[exponent]
475 converted = float(bytes) / float(1024 ** exponent)
476 return '%.2f%s' % (converted, suffix)
479 def calc_percent(byte_counter, data_len):
482 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
485 def calc_eta(start, now, total, current):
489 if current == 0 or dif < 0.001: # One millisecond
491 rate = float(current) / dif
492 eta = long((float(total) - float(current)) / rate)
493 (eta_mins, eta_secs) = divmod(eta, 60)
496 return '%02d:%02d' % (eta_mins, eta_secs)
499 def calc_speed(start, now, bytes):
501 if bytes == 0 or dif < 0.001: # One millisecond
502 return '%10s' % '---b/s'
503 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
506 def best_block_size(elapsed_time, bytes):
507 new_min = max(bytes / 2.0, 1.0)
508 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
509 if elapsed_time < 0.001:
511 rate = bytes / elapsed_time
519 def parse_bytes(bytestr):
520 """Parse a string indicating a byte quantity into a long integer."""
521 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
524 number = float(matchobj.group(1))
525 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
526 return long(round(number * multiplier))
528 def add_info_extractor(self, ie):
529 """Add an InfoExtractor object to the end of the list."""
531 ie.set_downloader(self)
533 def add_post_processor(self, pp):
534 """Add a PostProcessor object to the end of the chain."""
536 pp.set_downloader(self)
538 def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
539 """Print message to stdout if not in quiet mode."""
541 if not self.params.get('quiet', False):
542 terminator = [u'\n', u''][skip_eol]
543 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
544 self._screen_file.flush()
545 except (UnicodeEncodeError), err:
546 if not ignore_encoding_errors:
549 def to_stderr(self, message):
550 """Print message to stderr."""
551 print >>sys.stderr, message.encode(preferredencoding())
553 def to_cons_title(self, message):
554 """Set console/terminal window title to message."""
555 if not self.params.get('consoletitle', False):
557 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
558 # c_wchar_p() might not be necessary if `message` is
559 # already of type unicode()
560 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
561 elif 'TERM' in os.environ:
562 sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
564 def fixed_template(self):
565 """Checks if the output template is fixed."""
566 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
568 def trouble(self, message=None):
569 """Determine action to take when a download problem appears.
571 Depending on if the downloader has been configured to ignore
572 download errors or not, this method may throw an exception or
573 not when errors are found, after printing the message.
575 if message is not None:
576 self.to_stderr(message)
577 if not self.params.get('ignoreerrors', False):
578 raise DownloadError(message)
579 self._download_retcode = 1
581 def slow_down(self, start_time, byte_counter):
582 """Sleep if the download speed is over the rate limit."""
583 rate_limit = self.params.get('ratelimit', None)
584 if rate_limit is None or byte_counter == 0:
587 elapsed = now - start_time
590 speed = float(byte_counter) / elapsed
591 if speed > rate_limit:
592 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
594 def temp_name(self, filename):
595 """Returns a temporary filename for the given filename."""
596 if self.params.get('nopart', False) or filename == u'-' or \
597 (os.path.exists(filename) and not os.path.isfile(filename)):
599 return filename + u'.part'
601 def undo_temp_name(self, filename):
602 if filename.endswith(u'.part'):
603 return filename[:-len(u'.part')]
606 def try_rename(self, old_filename, new_filename):
608 if old_filename == new_filename:
610 os.rename(old_filename, new_filename)
611 except (IOError, OSError), err:
612 self.trouble(u'ERROR: unable to rename file')
614 def try_utime(self, filename, last_modified_hdr):
615 """Try to set the last-modified time of the given file."""
616 if last_modified_hdr is None:
618 if not os.path.isfile(filename):
620 timestr = last_modified_hdr
623 filetime = timeconvert(timestr)
627 os.utime(filename, (time.time(), filetime))
631 def report_writedescription(self, descfn):
632 """ Report that the description file is being written """
633 self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
635 def report_writeinfojson(self, infofn):
636 """ Report that the metadata file has been written """
637 self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
639 def report_destination(self, filename):
640 """Report destination filename."""
641 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
643 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
644 """Report download progress."""
645 if self.params.get('noprogress', False):
647 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
648 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
649 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
650 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
652 def report_resuming_byte(self, resume_len):
653 """Report attempt to resume at given byte."""
654 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
656 def report_retry(self, count, retries):
657 """Report retry in case of HTTP error 5xx"""
658 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
660 def report_file_already_downloaded(self, file_name):
661 """Report file has already been fully downloaded."""
663 self.to_screen(u'[download] %s has already been downloaded' % file_name)
664 except (UnicodeEncodeError), err:
665 self.to_screen(u'[download] The file has already been downloaded')
667 def report_unable_to_resume(self):
668 """Report it was impossible to resume download."""
669 self.to_screen(u'[download] Unable to resume')
671 def report_finish(self):
672 """Report download finished."""
673 if self.params.get('noprogress', False):
674 self.to_screen(u'[download] Download completed')
678 def increment_downloads(self):
679 """Increment the ordinal that assigns a number to each file."""
680 self._num_downloads += 1
682 def prepare_filename(self, info_dict):
683 """Generate the output filename."""
685 template_dict = dict(info_dict)
686 template_dict['epoch'] = unicode(long(time.time()))
687 template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
688 filename = self.params['outtmpl'] % template_dict
690 except (ValueError, KeyError), err:
691 self.trouble(u'ERROR: invalid system charset or erroneous output template')
694 def process_info(self, info_dict):
695 """Process a single dictionary returned by an InfoExtractor."""
696 filename = self.prepare_filename(info_dict)
697 # Do nothing else if in simulate mode
698 if self.params.get('simulate', False):
700 if self.params.get('forcetitle', False):
701 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
702 if self.params.get('forceurl', False):
703 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
704 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
705 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
706 if self.params.get('forcedescription', False) and 'description' in info_dict:
707 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
708 if self.params.get('forcefilename', False) and filename is not None:
709 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
715 if self.params.get('nooverwrites', False) and os.path.exists(filename):
716 self.to_stderr(u'WARNING: file exists and will be skipped')
720 dn = os.path.dirname(filename)
721 if dn != '' and not os.path.exists(dn):
723 except (OSError, IOError), err:
724 self.trouble(u'ERROR: unable to create directory ' + unicode(err))
727 if self.params.get('writedescription', False):
729 descfn = filename + '.description'
730 self.report_writedescription(descfn)
731 descfile = open(descfn, 'wb')
733 descfile.write(info_dict['description'].encode('utf-8'))
736 except (OSError, IOError):
737 self.trouble(u'ERROR: Cannot write description file ' + descfn)
740 if self.params.get('writeinfojson', False):
741 infofn = filename + '.info.json'
742 self.report_writeinfojson(infofn)
745 except (NameError,AttributeError):
746 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
749 infof = open(infofn, 'wb')
751 json.dump(info_dict, infof)
754 except (OSError, IOError):
755 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
759 success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
760 except (OSError, IOError), err:
761 raise UnavailableVideoError
762 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
763 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
765 except (ContentTooShortError, ), err:
766 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
771 self.post_process(filename, info_dict)
772 except (PostProcessingError), err:
773 self.trouble(u'ERROR: postprocessing: %s' % str(err))
776 def download(self, url_list):
777 """Download a given list of URLs."""
778 if len(url_list) > 1 and self.fixed_template():
779 raise SameFileError(self.params['outtmpl'])
782 suitable_found = False
784 # Go to next InfoExtractor if not suitable
785 if not ie.suitable(url):
788 # Suitable InfoExtractor found
789 suitable_found = True
791 # Extract information from URL and process it
794 # Suitable InfoExtractor had been found; go to next URL
797 if not suitable_found:
798 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
800 return self._download_retcode
802 def post_process(self, filename, ie_info):
803 """Run the postprocessing chain on the given file."""
805 info['filepath'] = filename
811 def _download_with_rtmpdump(self, filename, url, player_url):
812 self.report_destination(filename)
813 tmpfilename = self.temp_name(filename)
815 # Check for rtmpdump first
817 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
818 except (OSError, IOError):
819 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
822 # Download using rtmpdump. rtmpdump returns exit code 2 when
823 # the connection was interrumpted and resuming appears to be
824 # possible. This is part of rtmpdump's normal usage, AFAIK.
825 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
826 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
827 while retval == 2 or retval == 1:
828 prevsize = os.path.getsize(tmpfilename)
829 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
830 time.sleep(5.0) # This seems to be needed
831 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
832 cursize = os.path.getsize(tmpfilename)
833 if prevsize == cursize and retval == 1:
835 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
836 if prevsize == cursize and retval == 2 and cursize > 1024:
837 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
841 self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
842 self.try_rename(tmpfilename, filename)
845 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
848 def _do_download(self, filename, url, player_url):
849 # Check file already present
850 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
851 self.report_file_already_downloaded(filename)
854 # Attempt to download using rtmpdump
855 if url.startswith('rtmp'):
856 return self._download_with_rtmpdump(filename, url, player_url)
858 tmpfilename = self.temp_name(filename)
862 # Do not include the Accept-Encoding header
863 headers = {'Youtubedl-no-compression': 'True'}
864 basic_request = urllib2.Request(url, None, headers)
865 request = urllib2.Request(url, None, headers)
867 # Establish possible resume length
868 if os.path.isfile(tmpfilename):
869 resume_len = os.path.getsize(tmpfilename)
873 # Request parameters in case of being able to resume
874 if self.params.get('continuedl', False) and resume_len != 0:
875 self.report_resuming_byte(resume_len)
876 request.add_header('Range', 'bytes=%d-' % resume_len)
880 retries = self.params.get('retries', 0)
881 while count <= retries:
882 # Establish connection
884 data = urllib2.urlopen(request)
886 except (urllib2.HTTPError, ), err:
887 if (err.code < 500 or err.code >= 600) and err.code != 416:
888 # Unexpected HTTP error
890 elif err.code == 416:
891 # Unable to resume (requested range not satisfiable)
893 # Open the connection again without the range header
894 data = urllib2.urlopen(basic_request)
895 content_length = data.info()['Content-Length']
896 except (urllib2.HTTPError, ), err:
897 if err.code < 500 or err.code >= 600:
900 # Examine the reported length
901 if (content_length is not None and
902 (resume_len - 100 < long(content_length) < resume_len + 100)):
903 # The file had already been fully downloaded.
904 # Explanation to the above condition: in issue #175 it was revealed that
905 # YouTube sometimes adds or removes a few bytes from the end of the file,
906 # changing the file size slightly and causing problems for some users. So
907 # I decided to implement a suggested change and consider the file
908 # completely downloaded if the file size differs less than 100 bytes from
909 # the one in the hard drive.
910 self.report_file_already_downloaded(filename)
911 self.try_rename(tmpfilename, filename)
914 # The length does not match, we start the download over
915 self.report_unable_to_resume()
921 self.report_retry(count, retries)
924 self.trouble(u'ERROR: giving up after %s retries' % retries)
927 data_len = data.info().get('Content-length', None)
928 if data_len is not None:
929 data_len = long(data_len) + resume_len
930 data_len_str = self.format_bytes(data_len)
931 byte_counter = 0 + resume_len
937 data_block = data.read(block_size)
939 if len(data_block) == 0:
941 byte_counter += len(data_block)
943 # Open file just in time
946 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
947 assert stream is not None
948 filename = self.undo_temp_name(tmpfilename)
949 self.report_destination(filename)
950 except (OSError, IOError), err:
951 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
954 stream.write(data_block)
955 except (IOError, OSError), err:
956 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
958 block_size = self.best_block_size(after - before, len(data_block))
961 percent_str = self.calc_percent(byte_counter, data_len)
962 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
963 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
964 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
967 self.slow_down(start, byte_counter - resume_len)
970 self.trouble(u'\nERROR: Did not get any data blocks')
974 if data_len is not None and byte_counter != data_len:
975 raise ContentTooShortError(byte_counter, long(data_len))
976 self.try_rename(tmpfilename, filename)
978 # Update file modification time
979 if self.params.get('updatetime', True):
980 self.try_utime(filename, data.info().get('last-modified', None))
985 class InfoExtractor(object):
986 """Information Extractor class.
988 Information extractors are the classes that, given a URL, extract
989 information from the video (or videos) the URL refers to. This
990 information includes the real video URL, the video title and simplified
991 title, author and others. The information is stored in a dictionary
992 which is then passed to the FileDownloader. The FileDownloader
993 processes this information possibly downloading the video to the file
994 system, among other possible outcomes. The dictionaries must include
995 the following fields:
997 id: Video identifier.
998 url: Final video URL.
999 uploader: Nickname of the video uploader.
1000 title: Literal title.
1001 stitle: Simplified title.
1002 ext: Video filename extension.
1003 format: Video format.
1004 player_url: SWF Player URL (may be None).
1006 The following fields are optional. Their primary purpose is to allow
1007 youtube-dl to serve as the backend for a video search function, such
1008 as the one in youtube2mp3. They are only used when their respective
1009 forced printing functions are called:
1011 thumbnail: Full URL to a video thumbnail image.
1012 description: One-line video description.
1014 Subclasses of this one should re-define the _real_initialize() and
1015 _real_extract() methods, as well as the suitable() static method.
1016 Probably, they should also be instantiated and added to the main
1023 def __init__(self, downloader=None):
1024 """Constructor. Receives an optional downloader."""
1026 self.set_downloader(downloader)
1030 """Receives a URL and returns True if suitable for this IE."""
1033 def initialize(self):
1034 """Initializes an instance (authentication, etc)."""
1036 self._real_initialize()
1039 def extract(self, url):
1040 """Extracts URL information and returns it in list of dicts."""
1042 return self._real_extract(url)
1044 def set_downloader(self, downloader):
1045 """Sets the downloader for this IE."""
1046 self._downloader = downloader
1048 def _real_initialize(self):
1049 """Real initialization process. Redefine in subclasses."""
1052 def _real_extract(self, url):
1053 """Real extraction process. Redefine in subclasses."""
1057 class YoutubeIE(InfoExtractor):
1058 """Information extractor for youtube.com."""
1060 _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
1061 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
1062 _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
1063 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
1064 _NETRC_MACHINE = 'youtube'
1065 # Listed in order of quality
1066 _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']
1067 _video_extensions = {
1073 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
1080 return (re.match(YoutubeIE._VALID_URL, url) is not None)
1082 def report_lang(self):
1083 """Report attempt to set language."""
1084 self._downloader.to_screen(u'[youtube] Setting language')
1086 def report_login(self):
1087 """Report attempt to log in."""
1088 self._downloader.to_screen(u'[youtube] Logging in')
1090 def report_age_confirmation(self):
1091 """Report attempt to confirm age."""
1092 self._downloader.to_screen(u'[youtube] Confirming age')
1094 def report_video_webpage_download(self, video_id):
1095 """Report attempt to download video webpage."""
1096 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
1098 def report_video_info_webpage_download(self, video_id):
1099 """Report attempt to download video info webpage."""
1100 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
1102 def report_information_extraction(self, video_id):
1103 """Report attempt to extract video information."""
1104 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
1106 def report_unavailable_format(self, video_id, format):
1107 """Report extracted video URL."""
1108 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
1110 def report_rtmp_download(self):
1111 """Indicate the download will use the RTMP protocol."""
1112 self._downloader.to_screen(u'[youtube] RTMP download detected')
1114 def _real_initialize(self):
1115 if self._downloader is None:
1120 downloader_params = self._downloader.params
1122 # Attempt to use provided username and password or .netrc data
1123 if downloader_params.get('username', None) is not None:
1124 username = downloader_params['username']
1125 password = downloader_params['password']
1126 elif downloader_params.get('usenetrc', False):
1128 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
1129 if info is not None:
1133 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
1134 except (IOError, netrc.NetrcParseError), err:
1135 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
1139 request = urllib2.Request(self._LANG_URL)
1142 urllib2.urlopen(request).read()
1143 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1144 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
1147 # No authentication to be performed
1148 if username is None:
1153 'current_form': 'loginForm',
1155 'action_login': 'Log In',
1156 'username': username,
1157 'password': password,
1159 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
1162 login_results = urllib2.urlopen(request).read()
1163 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
1164 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
1166 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1167 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
1173 'action_confirm': 'Confirm',
1175 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
1177 self.report_age_confirmation()
1178 age_results = urllib2.urlopen(request).read()
1179 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1180 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1183 def _real_extract(self, url):
1184 # Extract video id from URL
1185 mobj = re.match(self._VALID_URL, url)
1187 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1189 video_id = mobj.group(2)
1192 self.report_video_webpage_download(video_id)
1193 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
1195 video_webpage = urllib2.urlopen(request).read()
1196 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1197 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1200 # Attempt to extract SWF player URL
1201 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1202 if mobj is not None:
1203 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1208 self.report_video_info_webpage_download(video_id)
1209 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1210 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1211 % (video_id, el_type))
1212 request = urllib2.Request(video_info_url)
1214 video_info_webpage = urllib2.urlopen(request).read()
1215 video_info = parse_qs(video_info_webpage)
1216 if 'token' in video_info:
1218 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1219 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1221 if 'token' not in video_info:
1222 if 'reason' in video_info:
1223 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1225 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1228 # Start extracting information
1229 self.report_information_extraction(video_id)
1232 if 'author' not in video_info:
1233 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1235 video_uploader = urllib.unquote_plus(video_info['author'][0])
1238 if 'title' not in video_info:
1239 self._downloader.trouble(u'ERROR: unable to extract video title')
1241 video_title = urllib.unquote_plus(video_info['title'][0])
1242 video_title = video_title.decode('utf-8')
1243 video_title = sanitize_title(video_title)
1246 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1247 simple_title = simple_title.strip(ur'_')
1250 if 'thumbnail_url' not in video_info:
1251 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1252 video_thumbnail = ''
1253 else: # don't panic if we can't find it
1254 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1258 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1259 if mobj is not None:
1260 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1261 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1262 for expression in format_expressions:
1264 upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1272 video_description = u'No description available.'
1273 if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
1274 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1275 if mobj is not None:
1276 video_description = mobj.group(1).decode('utf-8')
1278 html_parser = lxml.etree.HTMLParser(encoding='utf-8')
1279 vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
1280 video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
1281 # TODO use another parser
1284 video_token = urllib.unquote_plus(video_info['token'][0])
1286 # Decide which formats to download
1287 req_format = self._downloader.params.get('format', None)
1289 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1290 self.report_rtmp_download()
1291 video_url_list = [(None, video_info['conn'][0])]
1292 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1293 url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
1294 url_data = [parse_qs(uds) for uds in url_data_strs]
1295 url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
1296 url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
1298 format_limit = self._downloader.params.get('format_limit', None)
1299 if format_limit is not None and format_limit in self._available_formats:
1300 format_list = self._available_formats[self._available_formats.index(format_limit):]
1302 format_list = self._available_formats
1303 existing_formats = [x for x in format_list if x in url_map]
1304 if len(existing_formats) == 0:
1305 self._downloader.trouble(u'ERROR: no known formats available for video')
1307 if req_format is None:
1308 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1309 elif req_format == '-1':
1310 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1313 if req_format not in url_map:
1314 self._downloader.trouble(u'ERROR: requested format not available')
1316 video_url_list = [(req_format, url_map[req_format])] # Specific format
1318 self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
1321 for format_param, video_real_url in video_url_list:
1322 # At this point we have a new video
1323 self._downloader.increment_downloads()
1326 video_extension = self._video_extensions.get(format_param, 'flv')
1329 # Process video information
1330 self._downloader.process_info({
1331 'id': video_id.decode('utf-8'),
1332 'url': video_real_url.decode('utf-8'),
1333 'uploader': video_uploader.decode('utf-8'),
1334 'upload_date': upload_date,
1335 'title': video_title,
1336 'stitle': simple_title,
1337 'ext': video_extension.decode('utf-8'),
1338 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
1339 'thumbnail': video_thumbnail.decode('utf-8'),
1340 'description': video_description,
1341 'player_url': player_url,
1343 except UnavailableVideoError, err:
1344 self._downloader.trouble(u'\nERROR: unable to download video')
1347 class MetacafeIE(InfoExtractor):
1348 """Information Extractor for metacafe.com."""
1350 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1351 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1352 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1355 def __init__(self, youtube_ie, downloader=None):
1356 InfoExtractor.__init__(self, downloader)
1357 self._youtube_ie = youtube_ie
1361 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1363 def report_disclaimer(self):
1364 """Report disclaimer retrieval."""
1365 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1367 def report_age_confirmation(self):
1368 """Report attempt to confirm age."""
1369 self._downloader.to_screen(u'[metacafe] Confirming age')
1371 def report_download_webpage(self, video_id):
1372 """Report webpage download."""
1373 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1375 def report_extraction(self, video_id):
1376 """Report information extraction."""
1377 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1379 def _real_initialize(self):
1380 # Retrieve disclaimer
1381 request = urllib2.Request(self._DISCLAIMER)
1383 self.report_disclaimer()
1384 disclaimer = urllib2.urlopen(request).read()
1385 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1386 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1392 'submit': "Continue - I'm over 18",
1394 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1396 self.report_age_confirmation()
1397 disclaimer = urllib2.urlopen(request).read()
1398 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1399 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1402 def _real_extract(self, url):
1403 # Extract id and simplified title from URL
1404 mobj = re.match(self._VALID_URL, url)
1406 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1409 video_id = mobj.group(1)
1411 # Check if video comes from YouTube
1412 mobj2 = re.match(r'^yt-(.*)$', video_id)
1413 if mobj2 is not None:
1414 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1417 # At this point we have a new video
1418 self._downloader.increment_downloads()
1420 simple_title = mobj.group(2).decode('utf-8')
1422 # Retrieve video webpage to extract further information
1423 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1425 self.report_download_webpage(video_id)
1426 webpage = urllib2.urlopen(request).read()
1427 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1428 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1431 # Extract URL, uploader and title from webpage
1432 self.report_extraction(video_id)
1433 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1434 if mobj is not None:
1435 mediaURL = urllib.unquote(mobj.group(1))
1436 video_extension = mediaURL[-3:]
1438 # Extract gdaKey if available
1439 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1441 video_url = mediaURL
1443 gdaKey = mobj.group(1)
1444 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1446 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1448 self._downloader.trouble(u'ERROR: unable to extract media URL')
1450 vardict = parse_qs(mobj.group(1))
1451 if 'mediaData' not in vardict:
1452 self._downloader.trouble(u'ERROR: unable to extract media URL')
1454 mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1456 self._downloader.trouble(u'ERROR: unable to extract media URL')
1458 mediaURL = mobj.group(1).replace('\\/', '/')
1459 video_extension = mediaURL[-3:]
1460 video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1462 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1464 self._downloader.trouble(u'ERROR: unable to extract title')
1466 video_title = mobj.group(1).decode('utf-8')
1467 video_title = sanitize_title(video_title)
1469 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1471 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1473 video_uploader = mobj.group(1)
1476 # Process video information
1477 self._downloader.process_info({
1478 'id': video_id.decode('utf-8'),
1479 'url': video_url.decode('utf-8'),
1480 'uploader': video_uploader.decode('utf-8'),
1481 'upload_date': u'NA',
1482 'title': video_title,
1483 'stitle': simple_title,
1484 'ext': video_extension.decode('utf-8'),
1488 except UnavailableVideoError:
1489 self._downloader.trouble(u'\nERROR: unable to download video')
1492 class DailymotionIE(InfoExtractor):
1493 """Information Extractor for Dailymotion"""
1495 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1497 def __init__(self, downloader=None):
1498 InfoExtractor.__init__(self, downloader)
1502 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1504 def report_download_webpage(self, video_id):
1505 """Report webpage download."""
1506 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1508 def report_extraction(self, video_id):
1509 """Report information extraction."""
1510 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1512 def _real_initialize(self):
1515 def _real_extract(self, url):
1516 # Extract id and simplified title from URL
1517 mobj = re.match(self._VALID_URL, url)
1519 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1522 # At this point we have a new video
1523 self._downloader.increment_downloads()
1524 video_id = mobj.group(1)
1526 simple_title = mobj.group(2).decode('utf-8')
1527 video_extension = 'flv'
1529 # Retrieve video webpage to extract further information
1530 request = urllib2.Request(url)
1531 request.add_header('Cookie', 'family_filter=off')
1533 self.report_download_webpage(video_id)
1534 webpage = urllib2.urlopen(request).read()
1535 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1536 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1539 # Extract URL, uploader and title from webpage
1540 self.report_extraction(video_id)
1541 mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)
1543 self._downloader.trouble(u'ERROR: unable to extract media URL')
1545 sequence = urllib.unquote(mobj.group(1))
1546 mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
1548 self._downloader.trouble(u'ERROR: unable to extract media URL')
1550 mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
1552 # if needed add http://www.dailymotion.com/ if relative URL
1554 video_url = mediaURL
1556 mobj = re.search(r'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?</title>', webpage)
1558 self._downloader.trouble(u'ERROR: unable to extract title')
1560 video_title = mobj.group(1).decode('utf-8')
1561 video_title = sanitize_title(video_title)
1563 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
1565 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1567 video_uploader = mobj.group(1)
1570 # Process video information
1571 self._downloader.process_info({
1572 'id': video_id.decode('utf-8'),
1573 'url': video_url.decode('utf-8'),
1574 'uploader': video_uploader.decode('utf-8'),
1575 'upload_date': u'NA',
1576 'title': video_title,
1577 'stitle': simple_title,
1578 'ext': video_extension.decode('utf-8'),
1582 except UnavailableVideoError:
1583 self._downloader.trouble(u'\nERROR: unable to download video')
1586 class GoogleIE(InfoExtractor):
1587 """Information extractor for video.google.com."""
1589 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1591 def __init__(self, downloader=None):
1592 InfoExtractor.__init__(self, downloader)
1596 return (re.match(GoogleIE._VALID_URL, url) is not None)
1598 def report_download_webpage(self, video_id):
1599 """Report webpage download."""
1600 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1602 def report_extraction(self, video_id):
1603 """Report information extraction."""
1604 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1606 def _real_initialize(self):
1609 def _real_extract(self, url):
1610 # Extract id from URL
1611 mobj = re.match(self._VALID_URL, url)
1613 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1616 # At this point we have a new video
1617 self._downloader.increment_downloads()
1618 video_id = mobj.group(1)
1620 video_extension = 'mp4'
1622 # Retrieve video webpage to extract further information
1623 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1625 self.report_download_webpage(video_id)
1626 webpage = urllib2.urlopen(request).read()
1627 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1628 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1631 # Extract URL, uploader, and title from webpage
1632 self.report_extraction(video_id)
1633 mobj = re.search(r"download_url:'([^']+)'", webpage)
1635 video_extension = 'flv'
1636 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1638 self._downloader.trouble(u'ERROR: unable to extract media URL')
1640 mediaURL = urllib.unquote(mobj.group(1))
1641 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1642 mediaURL = mediaURL.replace('\\x26', '\x26')
1644 video_url = mediaURL
1646 mobj = re.search(r'<title>(.*)</title>', webpage)
1648 self._downloader.trouble(u'ERROR: unable to extract title')
1650 video_title = mobj.group(1).decode('utf-8')
1651 video_title = sanitize_title(video_title)
1652 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1654 # Extract video description
1655 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1657 self._downloader.trouble(u'ERROR: unable to extract video description')
1659 video_description = mobj.group(1).decode('utf-8')
1660 if not video_description:
1661 video_description = 'No description available.'
1663 # Extract video thumbnail
1664 if self._downloader.params.get('forcethumbnail', False):
1665 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1667 webpage = urllib2.urlopen(request).read()
1668 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1669 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1671 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1673 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1675 video_thumbnail = mobj.group(1)
1676 else: # we need something to pass to process_info
1677 video_thumbnail = ''
1680 # Process video information
1681 self._downloader.process_info({
1682 'id': video_id.decode('utf-8'),
1683 'url': video_url.decode('utf-8'),
1685 'upload_date': u'NA',
1686 'title': video_title,
1687 'stitle': simple_title,
1688 'ext': video_extension.decode('utf-8'),
1692 except UnavailableVideoError:
1693 self._downloader.trouble(u'\nERROR: unable to download video')
1696 class PhotobucketIE(InfoExtractor):
1697 """Information extractor for photobucket.com."""
1699 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1701 def __init__(self, downloader=None):
1702 InfoExtractor.__init__(self, downloader)
1706 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1708 def report_download_webpage(self, video_id):
1709 """Report webpage download."""
1710 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1712 def report_extraction(self, video_id):
1713 """Report information extraction."""
1714 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1716 def _real_initialize(self):
1719 def _real_extract(self, url):
1720 # Extract id from URL
1721 mobj = re.match(self._VALID_URL, url)
1723 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1726 # At this point we have a new video
1727 self._downloader.increment_downloads()
1728 video_id = mobj.group(1)
1730 video_extension = 'flv'
1732 # Retrieve video webpage to extract further information
1733 request = urllib2.Request(url)
1735 self.report_download_webpage(video_id)
1736 webpage = urllib2.urlopen(request).read()
1737 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1738 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1741 # Extract URL, uploader, and title from webpage
1742 self.report_extraction(video_id)
1743 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1745 self._downloader.trouble(u'ERROR: unable to extract media URL')
1747 mediaURL = urllib.unquote(mobj.group(1))
1749 video_url = mediaURL
1751 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1753 self._downloader.trouble(u'ERROR: unable to extract title')
1755 video_title = mobj.group(1).decode('utf-8')
1756 video_title = sanitize_title(video_title)
1757 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1759 video_uploader = mobj.group(2).decode('utf-8')
1762 # Process video information
1763 self._downloader.process_info({
1764 'id': video_id.decode('utf-8'),
1765 'url': video_url.decode('utf-8'),
1766 'uploader': video_uploader,
1767 'upload_date': u'NA',
1768 'title': video_title,
1769 'stitle': simple_title,
1770 'ext': video_extension.decode('utf-8'),
1774 except UnavailableVideoError:
1775 self._downloader.trouble(u'\nERROR: unable to download video')
1778 class YahooIE(InfoExtractor):
1779 """Information extractor for video.yahoo.com."""
1781 # _VALID_URL matches all Yahoo! Video URLs
1782 # _VPAGE_URL matches only the extractable '/watch/' URLs
1783 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1784 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1786 def __init__(self, downloader=None):
1787 InfoExtractor.__init__(self, downloader)
1791 return (re.match(YahooIE._VALID_URL, url) is not None)
1793 def report_download_webpage(self, video_id):
1794 """Report webpage download."""
1795 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1797 def report_extraction(self, video_id):
1798 """Report information extraction."""
1799 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1801 def _real_initialize(self):
1804 def _real_extract(self, url, new_video=True):
1805 # Extract ID from URL
1806 mobj = re.match(self._VALID_URL, url)
1808 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1811 # At this point we have a new video
1812 self._downloader.increment_downloads()
1813 video_id = mobj.group(2)
1814 video_extension = 'flv'
1816 # Rewrite valid but non-extractable URLs as
1817 # extractable English language /watch/ URLs
1818 if re.match(self._VPAGE_URL, url) is None:
1819 request = urllib2.Request(url)
1821 webpage = urllib2.urlopen(request).read()
1822 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1823 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1826 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1828 self._downloader.trouble(u'ERROR: Unable to extract id field')
1830 yahoo_id = mobj.group(1)
1832 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1834 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1836 yahoo_vid = mobj.group(1)
1838 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1839 return self._real_extract(url, new_video=False)
1841 # Retrieve video webpage to extract further information
1842 request = urllib2.Request(url)
1844 self.report_download_webpage(video_id)
1845 webpage = urllib2.urlopen(request).read()
1846 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1847 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1850 # Extract uploader and title from webpage
1851 self.report_extraction(video_id)
1852 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1854 self._downloader.trouble(u'ERROR: unable to extract video title')
1856 video_title = mobj.group(1).decode('utf-8')
1857 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1859 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1861 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1863 video_uploader = mobj.group(1).decode('utf-8')
1865 # Extract video thumbnail
1866 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1868 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1870 video_thumbnail = mobj.group(1).decode('utf-8')
1872 # Extract video description
1873 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1875 self._downloader.trouble(u'ERROR: unable to extract video description')
1877 video_description = mobj.group(1).decode('utf-8')
1878 if not video_description:
1879 video_description = 'No description available.'
1881 # Extract video height and width
1882 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1884 self._downloader.trouble(u'ERROR: unable to extract video height')
1886 yv_video_height = mobj.group(1)
1888 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1890 self._downloader.trouble(u'ERROR: unable to extract video width')
1892 yv_video_width = mobj.group(1)
1894 # Retrieve video playlist to extract media URL
1895 # I'm not completely sure what all these options are, but we
1896 # seem to need most of them, otherwise the server sends a 401.
1897 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1898 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1899 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1900 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1901 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1903 self.report_download_webpage(video_id)
1904 webpage = urllib2.urlopen(request).read()
1905 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1906 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1909 # Extract media URL from playlist XML
1910 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1912 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1914 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1915 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1918 # Process video information
1919 self._downloader.process_info({
1920 'id': video_id.decode('utf-8'),
1922 'uploader': video_uploader,
1923 'upload_date': u'NA',
1924 'title': video_title,
1925 'stitle': simple_title,
1926 'ext': video_extension.decode('utf-8'),
1927 'thumbnail': video_thumbnail.decode('utf-8'),
1928 'description': video_description,
1929 'thumbnail': video_thumbnail,
1932 except UnavailableVideoError:
1933 self._downloader.trouble(u'\nERROR: unable to download video')
1936 class VimeoIE(InfoExtractor):
1937 """Information extractor for vimeo.com."""
1939 # _VALID_URL matches Vimeo URLs
1940 _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
1942 def __init__(self, downloader=None):
1943 InfoExtractor.__init__(self, downloader)
1947 return (re.match(VimeoIE._VALID_URL, url) is not None)
1949 def report_download_webpage(self, video_id):
1950 """Report webpage download."""
1951 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
1953 def report_extraction(self, video_id):
1954 """Report information extraction."""
1955 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
1957 def _real_initialize(self):
1960 def _real_extract(self, url, new_video=True):
1961 # Extract ID from URL
1962 mobj = re.match(self._VALID_URL, url)
1964 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1967 # At this point we have a new video
1968 self._downloader.increment_downloads()
1969 video_id = mobj.group(1)
1971 # Retrieve video webpage to extract further information
1972 request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
1974 self.report_download_webpage(video_id)
1975 webpage = urllib2.urlopen(request).read()
1976 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1977 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1980 # Now we begin extracting as much information as we can from what we
1981 # retrieved. First we extract the information common to all extractors,
1982 # and latter we extract those that are Vimeo specific.
1983 self.report_extraction(video_id)
1986 mobj = re.search(r'<caption>(.*?)</caption>', webpage)
1988 self._downloader.trouble(u'ERROR: unable to extract video title')
1990 video_title = mobj.group(1).decode('utf-8')
1991 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1994 mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
1996 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1998 video_uploader = mobj.group(1).decode('utf-8')
2000 # Extract video thumbnail
2001 mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
2003 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
2005 video_thumbnail = mobj.group(1).decode('utf-8')
2007 # # Extract video description
2008 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
2010 # self._downloader.trouble(u'ERROR: unable to extract video description')
2012 # video_description = mobj.group(1).decode('utf-8')
2013 # if not video_description: video_description = 'No description available.'
2014 video_description = 'Foo.'
2016 # Vimeo specific: extract request signature
2017 mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
2019 self._downloader.trouble(u'ERROR: unable to extract request signature')
2021 sig = mobj.group(1).decode('utf-8')
2023 # Vimeo specific: Extract request signature expiration
2024 mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
2026 self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
2028 sig_exp = mobj.group(1).decode('utf-8')
2030 video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp)
2033 # Process video information
2034 self._downloader.process_info({
2035 'id': video_id.decode('utf-8'),
2037 'uploader': video_uploader,
2038 'upload_date': u'NA',
2039 'title': video_title,
2040 'stitle': simple_title,
2042 'thumbnail': video_thumbnail.decode('utf-8'),
2043 'description': video_description,
2044 'thumbnail': video_thumbnail,
2045 'description': video_description,
2048 except UnavailableVideoError:
2049 self._downloader.trouble(u'ERROR: unable to download video')
2052 class GenericIE(InfoExtractor):
2053 """Generic last-resort information extractor."""
2055 def __init__(self, downloader=None):
2056 InfoExtractor.__init__(self, downloader)
2062 def report_download_webpage(self, video_id):
2063 """Report webpage download."""
2064 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
2065 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
2067 def report_extraction(self, video_id):
2068 """Report information extraction."""
2069 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
2071 def _real_initialize(self):
2074 def _real_extract(self, url):
2075 # At this point we have a new video
2076 self._downloader.increment_downloads()
2078 video_id = url.split('/')[-1]
2079 request = urllib2.Request(url)
2081 self.report_download_webpage(video_id)
2082 webpage = urllib2.urlopen(request).read()
2083 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2084 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2086 except ValueError, err:
2087 # since this is the last-resort InfoExtractor, if
2088 # this error is thrown, it'll be thrown here
2089 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2092 self.report_extraction(video_id)
2093 # Start with something easy: JW Player in SWFObject
2094 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
2096 # Broaden the search a little bit
2097 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
2099 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2102 # It's possible that one of the regexes
2103 # matched, but returned an empty group:
2104 if mobj.group(1) is None:
2105 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2108 video_url = urllib.unquote(mobj.group(1))
2109 video_id = os.path.basename(video_url)
2111 # here's a fun little line of code for you:
2112 video_extension = os.path.splitext(video_id)[1][1:]
2113 video_id = os.path.splitext(video_id)[0]
2115 # it's tempting to parse this further, but you would
2116 # have to take into account all the variations like
2117 # Video Title - Site Name
2118 # Site Name | Video Title
2119 # Video Title - Tagline | Site Name
2120 # and so on and so forth; it's just not practical
2121 mobj = re.search(r'<title>(.*)</title>', webpage)
2123 self._downloader.trouble(u'ERROR: unable to extract title')
2125 video_title = mobj.group(1).decode('utf-8')
2126 video_title = sanitize_title(video_title)
2127 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2129 # video uploader is domain name
2130 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
2132 self._downloader.trouble(u'ERROR: unable to extract title')
2134 video_uploader = mobj.group(1).decode('utf-8')
2137 # Process video information
2138 self._downloader.process_info({
2139 'id': video_id.decode('utf-8'),
2140 'url': video_url.decode('utf-8'),
2141 'uploader': video_uploader,
2142 'upload_date': u'NA',
2143 'title': video_title,
2144 'stitle': simple_title,
2145 'ext': video_extension.decode('utf-8'),
2149 except UnavailableVideoError, err:
2150 self._downloader.trouble(u'\nERROR: unable to download video')
2153 class YoutubeSearchIE(InfoExtractor):
2154 """Information Extractor for YouTube search queries."""
2155 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
2156 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
2157 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
2158 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2160 _max_youtube_results = 1000
2162 def __init__(self, youtube_ie, downloader=None):
2163 InfoExtractor.__init__(self, downloader)
2164 self._youtube_ie = youtube_ie
2168 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
2170 def report_download_page(self, query, pagenum):
2171 """Report attempt to download playlist page with given number."""
2172 query = query.decode(preferredencoding())
2173 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
2175 def _real_initialize(self):
2176 self._youtube_ie.initialize()
2178 def _real_extract(self, query):
2179 mobj = re.match(self._VALID_QUERY, query)
2181 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2184 prefix, query = query.split(':')
2186 query = query.encode('utf-8')
2188 self._download_n_results(query, 1)
2190 elif prefix == 'all':
2191 self._download_n_results(query, self._max_youtube_results)
2197 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2199 elif n > self._max_youtube_results:
2200 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
2201 n = self._max_youtube_results
2202 self._download_n_results(query, n)
2204 except ValueError: # parsing prefix as integer fails
2205 self._download_n_results(query, 1)
2208 def _download_n_results(self, query, n):
2209 """Downloads a specified number of results for a query"""
2212 already_seen = set()
2216 self.report_download_page(query, pagenum)
2217 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2218 request = urllib2.Request(result_url)
2220 page = urllib2.urlopen(request).read()
2221 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2222 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2225 # Extract video identifiers
2226 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2227 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2228 if video_id not in already_seen:
2229 video_ids.append(video_id)
2230 already_seen.add(video_id)
2231 if len(video_ids) == n:
2232 # Specified n videos reached
2233 for id in video_ids:
2234 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2237 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2238 for id in video_ids:
2239 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2242 pagenum = pagenum + 1
2245 class GoogleSearchIE(InfoExtractor):
2246 """Information Extractor for Google Video search queries."""
2247 _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
2248 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2249 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
2250 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
2252 _max_google_results = 1000
2254 def __init__(self, google_ie, downloader=None):
2255 InfoExtractor.__init__(self, downloader)
2256 self._google_ie = google_ie
2260 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
2262 def report_download_page(self, query, pagenum):
2263 """Report attempt to download playlist page with given number."""
2264 query = query.decode(preferredencoding())
2265 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
2267 def _real_initialize(self):
2268 self._google_ie.initialize()
2270 def _real_extract(self, query):
2271 mobj = re.match(self._VALID_QUERY, query)
2273 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2276 prefix, query = query.split(':')
2278 query = query.encode('utf-8')
2280 self._download_n_results(query, 1)
2282 elif prefix == 'all':
2283 self._download_n_results(query, self._max_google_results)
2289 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2291 elif n > self._max_google_results:
2292 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
2293 n = self._max_google_results
2294 self._download_n_results(query, n)
2296 except ValueError: # parsing prefix as integer fails
2297 self._download_n_results(query, 1)
2300 def _download_n_results(self, query, n):
2301 """Downloads a specified number of results for a query"""
2304 already_seen = set()
2308 self.report_download_page(query, pagenum)
2309 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2310 request = urllib2.Request(result_url)
2312 page = urllib2.urlopen(request).read()
2313 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2314 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2317 # Extract video identifiers
2318 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2319 video_id = mobj.group(1)
2320 if video_id not in already_seen:
2321 video_ids.append(video_id)
2322 already_seen.add(video_id)
2323 if len(video_ids) == n:
2324 # Specified n videos reached
2325 for id in video_ids:
2326 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2329 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2330 for id in video_ids:
2331 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2334 pagenum = pagenum + 1
2337 class YahooSearchIE(InfoExtractor):
2338 """Information Extractor for Yahoo! Video search queries."""
2339 _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
2340 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2341 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2342 _MORE_PAGES_INDICATOR = r'\s*Next'
2344 _max_yahoo_results = 1000
2346 def __init__(self, yahoo_ie, downloader=None):
2347 InfoExtractor.__init__(self, downloader)
2348 self._yahoo_ie = yahoo_ie
2352 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
2354 def report_download_page(self, query, pagenum):
2355 """Report attempt to download playlist page with given number."""
2356 query = query.decode(preferredencoding())
2357 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2359 def _real_initialize(self):
2360 self._yahoo_ie.initialize()
2362 def _real_extract(self, query):
2363 mobj = re.match(self._VALID_QUERY, query)
2365 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2368 prefix, query = query.split(':')
2370 query = query.encode('utf-8')
2372 self._download_n_results(query, 1)
2374 elif prefix == 'all':
2375 self._download_n_results(query, self._max_yahoo_results)
2381 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2383 elif n > self._max_yahoo_results:
2384 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
2385 n = self._max_yahoo_results
2386 self._download_n_results(query, n)
2388 except ValueError: # parsing prefix as integer fails
2389 self._download_n_results(query, 1)
2392 def _download_n_results(self, query, n):
2393 """Downloads a specified number of results for a query"""
2396 already_seen = set()
2400 self.report_download_page(query, pagenum)
2401 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2402 request = urllib2.Request(result_url)
2404 page = urllib2.urlopen(request).read()
2405 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2406 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2409 # Extract video identifiers
2410 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2411 video_id = mobj.group(1)
2412 if video_id not in already_seen:
2413 video_ids.append(video_id)
2414 already_seen.add(video_id)
2415 if len(video_ids) == n:
2416 # Specified n videos reached
2417 for id in video_ids:
2418 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2421 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2422 for id in video_ids:
2423 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2426 pagenum = pagenum + 1
2429 class YoutubePlaylistIE(InfoExtractor):
2430 """Information Extractor for YouTube playlists."""
2432 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2433 _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2434 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2435 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2438 def __init__(self, youtube_ie, downloader=None):
2439 InfoExtractor.__init__(self, downloader)
2440 self._youtube_ie = youtube_ie
2444 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2446 def report_download_page(self, playlist_id, pagenum):
2447 """Report attempt to download playlist page with given number."""
2448 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2450 def _real_initialize(self):
2451 self._youtube_ie.initialize()
2453 def _real_extract(self, url):
2454 # Extract playlist id
2455 mobj = re.match(self._VALID_URL, url)
2457 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2461 if mobj.group(3) is not None:
2462 self._youtube_ie.extract(mobj.group(3))
2465 # Download playlist pages
2466 # prefix is 'p' as default for playlists but there are other types that need extra care
2467 playlist_prefix = mobj.group(1)
2468 if playlist_prefix == 'a':
2469 playlist_access = 'artist'
2471 playlist_prefix = 'p'
2472 playlist_access = 'view_play_list'
2473 playlist_id = mobj.group(2)
2478 self.report_download_page(playlist_id, pagenum)
2479 request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
2481 page = urllib2.urlopen(request).read()
2482 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2483 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2486 # Extract video identifiers
2488 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2489 if mobj.group(1) not in ids_in_page:
2490 ids_in_page.append(mobj.group(1))
2491 video_ids.extend(ids_in_page)
2493 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2495 pagenum = pagenum + 1
2497 playliststart = self._downloader.params.get('playliststart', 1) - 1
2498 playlistend = self._downloader.params.get('playlistend', -1)
2499 video_ids = video_ids[playliststart:playlistend]
2501 for id in video_ids:
2502 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2506 class YoutubeUserIE(InfoExtractor):
2507 """Information Extractor for YouTube users."""
2509 _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2510 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2511 _GDATA_PAGE_SIZE = 50
2512 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2513 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2516 def __init__(self, youtube_ie, downloader=None):
2517 InfoExtractor.__init__(self, downloader)
2518 self._youtube_ie = youtube_ie
2522 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2524 def report_download_page(self, username, start_index):
2525 """Report attempt to download user page."""
2526 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2527 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2529 def _real_initialize(self):
2530 self._youtube_ie.initialize()
2532 def _real_extract(self, url):
2534 mobj = re.match(self._VALID_URL, url)
2536 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2539 username = mobj.group(1)
2541 # Download video ids using YouTube Data API. Result size per
2542 # query is limited (currently to 50 videos) so we need to query
2543 # page by page until there are no video ids - it means we got
2550 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2551 self.report_download_page(username, start_index)
2553 request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2556 page = urllib2.urlopen(request).read()
2557 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2558 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2561 # Extract video identifiers
2564 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2565 if mobj.group(1) not in ids_in_page:
2566 ids_in_page.append(mobj.group(1))
2568 video_ids.extend(ids_in_page)
2570 # A little optimization - if current page is not
2571 # "full", ie. does not contain PAGE_SIZE video ids then
2572 # we can assume that this page is the last one - there
2573 # are no more ids on further pages - no need to query
2576 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2581 all_ids_count = len(video_ids)
2582 playliststart = self._downloader.params.get('playliststart', 1) - 1
2583 playlistend = self._downloader.params.get('playlistend', -1)
2585 if playlistend == -1:
2586 video_ids = video_ids[playliststart:]
2588 video_ids = video_ids[playliststart:playlistend]
2590 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2591 (username, all_ids_count, len(video_ids)))
2593 for video_id in video_ids:
2594 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2597 class DepositFilesIE(InfoExtractor):
2598 """Information extractor for depositfiles.com"""
2600 _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2602 def __init__(self, downloader=None):
2603 InfoExtractor.__init__(self, downloader)
2607 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2609 def report_download_webpage(self, file_id):
2610 """Report webpage download."""
2611 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2613 def report_extraction(self, file_id):
2614 """Report information extraction."""
2615 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2617 def _real_initialize(self):
2620 def _real_extract(self, url):
2621 # At this point we have a new file
2622 self._downloader.increment_downloads()
2624 file_id = url.split('/')[-1]
2625 # Rebuild url in english locale
2626 url = 'http://depositfiles.com/en/files/' + file_id
2628 # Retrieve file webpage with 'Free download' button pressed
2629 free_download_indication = { 'gateway_result' : '1' }
2630 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2632 self.report_download_webpage(file_id)
2633 webpage = urllib2.urlopen(request).read()
2634 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2635 self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2638 # Search for the real file URL
2639 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2640 if (mobj is None) or (mobj.group(1) is None):
2641 # Try to figure out reason of the error.
2642 mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2643 if (mobj is not None) and (mobj.group(1) is not None):
2644 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2645 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2647 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2650 file_url = mobj.group(1)
2651 file_extension = os.path.splitext(file_url)[1][1:]
2653 # Search for file title
2654 mobj = re.search(r'<b title="(.*?)">', webpage)
2656 self._downloader.trouble(u'ERROR: unable to extract title')
2658 file_title = mobj.group(1).decode('utf-8')
2661 # Process file information
2662 self._downloader.process_info({
2663 'id': file_id.decode('utf-8'),
2664 'url': file_url.decode('utf-8'),
2666 'upload_date': u'NA',
2667 'title': file_title,
2668 'stitle': file_title,
2669 'ext': file_extension.decode('utf-8'),
2673 except UnavailableVideoError, err:
2674 self._downloader.trouble(u'ERROR: unable to download file')
2677 class FacebookIE(InfoExtractor):
2678 """Information Extractor for Facebook"""
2680 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2681 _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2682 _NETRC_MACHINE = 'facebook'
2683 _available_formats = ['highqual', 'lowqual']
2684 _video_extensions = {
2689 def __init__(self, downloader=None):
2690 InfoExtractor.__init__(self, downloader)
2694 return (re.match(FacebookIE._VALID_URL, url) is not None)
2696 def _reporter(self, message):
2697 """Add header and report message."""
2698 self._downloader.to_screen(u'[facebook] %s' % message)
2700 def report_login(self):
2701 """Report attempt to log in."""
2702 self._reporter(u'Logging in')
2704 def report_video_webpage_download(self, video_id):
2705 """Report attempt to download video webpage."""
2706 self._reporter(u'%s: Downloading video webpage' % video_id)
2708 def report_information_extraction(self, video_id):
2709 """Report attempt to extract video information."""
2710 self._reporter(u'%s: Extracting video information' % video_id)
2712 def _parse_page(self, video_webpage):
2713 """Extract video information from page"""
2715 data = {'title': r'class="video_title datawrap">(.*?)</',
2716 'description': r'<div class="datawrap">(.*?)</div>',
2717 'owner': r'\("video_owner_name", "(.*?)"\)',
2718 'upload_date': r'data-date="(.*?)"',
2719 'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2722 for piece in data.keys():
2723 mobj = re.search(data[piece], video_webpage)
2724 if mobj is not None:
2725 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2729 for fmt in self._available_formats:
2730 mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2731 if mobj is not None:
2732 # URL is in a Javascript segment inside an escaped Unicode format within
2733 # the generally utf-8 page
2734 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2735 video_info['video_urls'] = video_urls
2739 def _real_initialize(self):
2740 if self._downloader is None:
2745 downloader_params = self._downloader.params
2747 # Attempt to use provided username and password or .netrc data
2748 if downloader_params.get('username', None) is not None:
2749 useremail = downloader_params['username']
2750 password = downloader_params['password']
2751 elif downloader_params.get('usenetrc', False):
2753 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2754 if info is not None:
2758 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2759 except (IOError, netrc.NetrcParseError), err:
2760 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2763 if useremail is None:
2772 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2775 login_results = urllib2.urlopen(request).read()
2776 if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2777 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2779 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2780 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2783 def _real_extract(self, url):
2784 mobj = re.match(self._VALID_URL, url)
2786 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2788 video_id = mobj.group('ID')
2791 self.report_video_webpage_download(video_id)
2792 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2794 page = urllib2.urlopen(request)
2795 video_webpage = page.read()
2796 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2797 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2800 # Start extracting information
2801 self.report_information_extraction(video_id)
2803 # Extract information
2804 video_info = self._parse_page(video_webpage)
2807 if 'owner' not in video_info:
2808 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2810 video_uploader = video_info['owner']
2813 if 'title' not in video_info:
2814 self._downloader.trouble(u'ERROR: unable to extract video title')
2816 video_title = video_info['title']
2817 video_title = video_title.decode('utf-8')
2818 video_title = sanitize_title(video_title)
2821 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2822 simple_title = simple_title.strip(ur'_')
2825 if 'thumbnail' not in video_info:
2826 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2827 video_thumbnail = ''
2829 video_thumbnail = video_info['thumbnail']
2833 if 'upload_date' in video_info:
2834 upload_time = video_info['upload_date']
2835 timetuple = email.utils.parsedate_tz(upload_time)
2836 if timetuple is not None:
2838 upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2843 video_description = video_info.get('description', 'No description available.')
2845 url_map = video_info['video_urls']
2846 if len(url_map.keys()) > 0:
2847 # Decide which formats to download
2848 req_format = self._downloader.params.get('format', None)
2849 format_limit = self._downloader.params.get('format_limit', None)
2851 if format_limit is not None and format_limit in self._available_formats:
2852 format_list = self._available_formats[self._available_formats.index(format_limit):]
2854 format_list = self._available_formats
2855 existing_formats = [x for x in format_list if x in url_map]
2856 if len(existing_formats) == 0:
2857 self._downloader.trouble(u'ERROR: no known formats available for video')
2859 if req_format is None:
2860 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2861 elif req_format == '-1':
2862 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2865 if req_format not in url_map:
2866 self._downloader.trouble(u'ERROR: requested format not available')
2868 video_url_list = [(req_format, url_map[req_format])] # Specific format
2870 for format_param, video_real_url in video_url_list:
2872 # At this point we have a new video
2873 self._downloader.increment_downloads()
2876 video_extension = self._video_extensions.get(format_param, 'mp4')
2879 # Process video information
2880 self._downloader.process_info({
2881 'id': video_id.decode('utf-8'),
2882 'url': video_real_url.decode('utf-8'),
2883 'uploader': video_uploader.decode('utf-8'),
2884 'upload_date': upload_date,
2885 'title': video_title,
2886 'stitle': simple_title,
2887 'ext': video_extension.decode('utf-8'),
2888 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
2889 'thumbnail': video_thumbnail.decode('utf-8'),
2890 'description': video_description.decode('utf-8'),
2893 except UnavailableVideoError, err:
2894 self._downloader.trouble(u'\nERROR: unable to download video')
2896 class BlipTVIE(InfoExtractor):
2897 """Information extractor for blip.tv"""
2899 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
2900 _URL_EXT = r'^.*\.([a-z0-9]+)$'
2904 return (re.match(BlipTVIE._VALID_URL, url) is not None)
2906 def report_extraction(self, file_id):
2907 """Report information extraction."""
2908 self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id)
2910 def _simplify_title(self, title):
2911 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
2912 res = res.strip(ur'_')
2915 def _real_extract(self, url):
2916 mobj = re.match(self._VALID_URL, url)
2918 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2925 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
2926 request = urllib2.Request(json_url)
2927 self.report_extraction(mobj.group(1))
2929 json_code = urllib2.urlopen(request).read()
2930 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2931 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
2934 json_data = json.loads(json_code)
2935 if 'Post' in json_data:
2936 data = json_data['Post']
2940 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
2941 video_url = data['media']['url']
2942 umobj = re.match(self._URL_EXT, video_url)
2944 raise ValueError('Can not determine filename extension')
2945 ext = umobj.group(1)
2947 self._downloader.increment_downloads()
2950 'id': data['item_id'],
2952 'uploader': data['display_name'],
2953 'upload_date': upload_date,
2954 'title': data['title'],
2955 'stitle': self._simplify_title(data['title']),
2957 'format': data['media']['mimeType'],
2958 'thumbnail': data['thumbnailUrl'],
2959 'description': data['description'],
2960 'player_url': data['embedUrl']
2962 except (ValueError,KeyError), err:
2963 self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
2967 self._downloader.process_info(info)
2968 except UnavailableVideoError, err:
2969 self._downloader.trouble(u'\nERROR: unable to download video')
2972 class MyVideoIE(InfoExtractor):
2973 """Information Extractor for myvideo.de."""
2975 _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
2977 def __init__(self, downloader=None):
2978 InfoExtractor.__init__(self, downloader)
2982 return (re.match(MyVideoIE._VALID_URL, url) is not None)
2984 def report_download_webpage(self, video_id):
2985 """Report webpage download."""
2986 self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
2988 def report_extraction(self, video_id):
2989 """Report information extraction."""
2990 self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
2992 def _real_initialize(self):
2995 def _real_extract(self,url):
2996 mobj = re.match(self._VALID_URL, url)
2998 self._download.trouble(u'ERROR: invalid URL: %s' % url)
3001 video_id = mobj.group(1)
3002 simple_title = mobj.group(2).decode('utf-8')
3003 # should actually not be necessary
3004 simple_title = sanitize_title(simple_title)
3005 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title)
3008 request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
3010 self.report_download_webpage(video_id)
3011 webpage = urllib2.urlopen(request).read()
3012 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3013 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
3016 self.report_extraction(video_id)
3017 mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
3020 self._downloader.trouble(u'ERROR: unable to extract media URL')
3022 video_url = mobj.group(1) + ('/%s.flv' % video_id)
3024 mobj = re.search('<title>([^<]+)</title>', webpage)
3026 self._downloader.trouble(u'ERROR: unable to extract title')
3029 video_title = mobj.group(1)
3030 video_title = sanitize_title(video_title)
3034 self._downloader.process_info({
3038 'upload_date': u'NA',
3039 'title': video_title,
3040 'stitle': simple_title,
3045 except UnavailableVideoError:
3046 self._downloader.trouble(u'\nERROR: Unable to download video')
3048 class ComedyCentralIE(InfoExtractor):
3049 """Information extractor for The Daily Show and Colbert Report """
3051 _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
3055 return (re.match(ComedyCentralIE._VALID_URL, url) is not None)
3057 def report_extraction(self, episode_id):
3058 self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
3060 def report_config_download(self, episode_id):
3061 self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
3063 def report_index_download(self, episode_id):
3064 self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
3066 def report_player_url(self, episode_id):
3067 self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
3069 def _simplify_title(self, title):
3070 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
3071 res = res.strip(ur'_')
3074 def _real_extract(self, url):
3075 mobj = re.match(self._VALID_URL, url)
3077 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3080 if mobj.group('shortname'):
3081 if mobj.group('shortname') in ('tds', 'thedailyshow'):
3082 url = 'http://www.thedailyshow.com/full-episodes/'
3084 url = 'http://www.colbertnation.com/full-episodes/'
3085 mobj = re.match(self._VALID_URL, url)
3086 assert mobj is not None
3088 dlNewest = not mobj.group('episode')
3090 epTitle = mobj.group('showname')
3092 epTitle = mobj.group('episode')
3094 req = urllib2.Request(url)
3095 self.report_extraction(epTitle)
3097 htmlHandle = urllib2.urlopen(req)
3098 html = htmlHandle.read()
3099 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3100 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3103 url = htmlHandle.geturl()
3104 mobj = re.match(self._VALID_URL, url)
3106 self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
3108 if mobj.group('episode') == '':
3109 self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
3111 epTitle = mobj.group('episode')
3113 mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"/>', html)
3114 if len(mMovieParams) == 0:
3115 self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
3118 playerUrl_raw = mMovieParams[0][0]
3119 self.report_player_url(epTitle)
3121 urlHandle = urllib2.urlopen(playerUrl_raw)
3122 playerUrl = urlHandle.geturl()
3123 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3124 self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
3127 uri = mMovieParams[0][1]
3128 indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
3129 self.report_index_download(epTitle)
3131 indexXml = urllib2.urlopen(indexUrl).read()
3132 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3133 self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
3136 idoc = xml.etree.ElementTree.fromstring(indexXml)
3137 itemEls = idoc.findall('.//item')
3138 for itemEl in itemEls:
3139 mediaId = itemEl.findall('./guid')[0].text
3140 shortMediaId = mediaId.split(':')[-1]
3141 showId = mediaId.split(':')[-2].replace('.com', '')
3142 officialTitle = itemEl.findall('./title')[0].text
3143 officialDate = itemEl.findall('./pubDate')[0].text
3145 configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
3146 urllib.urlencode({'uri': mediaId}))
3147 configReq = urllib2.Request(configUrl)
3148 self.report_config_download(epTitle)
3150 configXml = urllib2.urlopen(configReq).read()
3151 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3152 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3155 cdoc = xml.etree.ElementTree.fromstring(configXml)
3157 for rendition in cdoc.findall('.//rendition'):
3158 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
3162 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
3165 # For now, just pick the highest bitrate
3166 format,video_url = turls[-1]
3168 self._downloader.increment_downloads()
3170 effTitle = showId + '-' + epTitle
3175 'upload_date': officialDate,
3177 'stitle': self._simplify_title(effTitle),
3181 'description': officialTitle,
3182 'player_url': playerUrl
3186 self._downloader.process_info(info)
3187 except UnavailableVideoError, err:
3188 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
3192 class PostProcessor(object):
3193 """Post Processor class.
3195 PostProcessor objects can be added to downloaders with their
3196 add_post_processor() method. When the downloader has finished a
3197 successful download, it will take its internal chain of PostProcessors
3198 and start calling the run() method on each one of them, first with
3199 an initial argument and then with the returned value of the previous
3202 The chain will be stopped if one of them ever returns None or the end
3203 of the chain is reached.
3205 PostProcessor objects follow a "mutual registration" process similar
3206 to InfoExtractor objects.
3211 def __init__(self, downloader=None):
3212 self._downloader = downloader
3214 def set_downloader(self, downloader):
3215 """Sets the downloader for this PP."""
3216 self._downloader = downloader
3218 def run(self, information):
3219 """Run the PostProcessor.
3221 The "information" argument is a dictionary like the ones
3222 composed by InfoExtractors. The only difference is that this
3223 one has an extra field called "filepath" that points to the
3226 When this method returns None, the postprocessing chain is
3227 stopped. However, this method may return an information
3228 dictionary that will be passed to the next postprocessing
3229 object in the chain. It can be the one it received after
3230 changing some fields.
3232 In addition, this method may raise a PostProcessingError
3233 exception that will be taken into account by the downloader
3236 return information # by default, do nothing
3239 class FFmpegExtractAudioPP(PostProcessor):
3241 def __init__(self, downloader=None, preferredcodec=None):
3242 PostProcessor.__init__(self, downloader)
3243 if preferredcodec is None:
3244 preferredcodec = 'best'
3245 self._preferredcodec = preferredcodec
3248 def get_audio_codec(path):
3250 cmd = ['ffprobe', '-show_streams', '--', path]
3251 handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
3252 output = handle.communicate()[0]
3253 if handle.wait() != 0:
3255 except (IOError, OSError):
3258 for line in output.split('\n'):
3259 if line.startswith('codec_name='):
3260 audio_codec = line.split('=')[1].strip()
3261 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
3266 def run_ffmpeg(path, out_path, codec, more_opts):
3268 cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
3269 ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
3271 except (IOError, OSError):
3274 def run(self, information):
3275 path = information['filepath']
3277 filecodec = self.get_audio_codec(path)
3278 if filecodec is None:
3279 self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
3283 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
3284 if filecodec == 'aac' or filecodec == 'mp3':
3285 # Lossless if possible
3287 extension = filecodec
3288 if filecodec == 'aac':
3289 more_opts = ['-f', 'adts']
3292 acodec = 'libmp3lame'
3294 more_opts = ['-ab', '128k']
3296 # We convert the audio (lossy)
3297 acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
3298 extension = self._preferredcodec
3299 more_opts = ['-ab', '128k']
3300 if self._preferredcodec == 'aac':
3301 more_opts += ['-f', 'adts']
3303 (prefix, ext) = os.path.splitext(path)
3304 new_path = prefix + '.' + extension
3305 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
3306 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
3309 self._downloader.to_stderr(u'WARNING: error running ffmpeg')
3314 except (IOError, OSError):
3315 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
3318 information['filepath'] = new_path
3322 def updateSelf(downloader, filename):
3323 ''' Update the program file with the latest version from the repository '''
3324 # Note: downloader only used for options
3325 if not os.access(filename, os.W_OK):
3326 sys.exit('ERROR: no write permissions on %s' % filename)
3328 downloader.to_screen('Updating to latest version...')
3332 urlh = urllib.urlopen(UPDATE_URL)
3333 newcontent = urlh.read()
3336 except (IOError, OSError), err:
3337 sys.exit('ERROR: unable to download latest version')
3340 outf = open(filename, 'wb')
3342 outf.write(newcontent)
3345 except (IOError, OSError), err:
3346 sys.exit('ERROR: unable to overwrite current version')
3348 downloader.to_screen('Updated youtube-dl. Restart to use the new version.')
3355 def _format_option_string(option):
3356 ''' ('-o', '--option') -> -o, --format METAVAR'''
3360 if option._short_opts: opts.append(option._short_opts[0])
3361 if option._long_opts: opts.append(option._long_opts[0])
3362 if len(opts) > 1: opts.insert(1, ', ')
3364 if option.takes_value(): opts.append(' %s' % option.metavar)
3366 return "".join(opts)
3368 def _find_term_columns():
3369 columns = os.environ.get('COLUMNS', None)
3374 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3375 out,err = sp.communicate()
3376 return int(out.split()[1])
3382 max_help_position = 80
3384 # No need to wrap help messages if we're on a wide console
3385 columns = _find_term_columns()
3386 if columns: max_width = columns
3388 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
3389 fmt.format_option_strings = _format_option_string
3392 'version' : __version__,
3394 'usage' : '%prog [options] url [url...]',
3395 'conflict_handler' : 'resolve',
3398 parser = optparse.OptionParser(**kw)
3401 general = optparse.OptionGroup(parser, 'General Options')
3402 authentication = optparse.OptionGroup(parser, 'Authentication Options')
3403 video_format = optparse.OptionGroup(parser, 'Video Format Options')
3404 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
3405 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
3406 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
3408 general.add_option('-h', '--help',
3409 action='help', help='print this help text and exit')
3410 general.add_option('-v', '--version',
3411 action='version', help='print program version and exit')
3412 general.add_option('-U', '--update',
3413 action='store_true', dest='update_self', help='update this program to latest version')
3414 general.add_option('-i', '--ignore-errors',
3415 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
3416 general.add_option('-r', '--rate-limit',
3417 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
3418 general.add_option('-R', '--retries',
3419 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
3420 general.add_option('--playlist-start',
3421 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
3422 general.add_option('--playlist-end',
3423 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
3424 general.add_option('--dump-user-agent',
3425 action='store_true', dest='dump_user_agent',
3426 help='display the current browser identification', default=False)
3428 authentication.add_option('-u', '--username',
3429 dest='username', metavar='USERNAME', help='account username')
3430 authentication.add_option('-p', '--password',
3431 dest='password', metavar='PASSWORD', help='account password')
3432 authentication.add_option('-n', '--netrc',
3433 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
3436 video_format.add_option('-f', '--format',
3437 action='store', dest='format', metavar='FORMAT', help='video format code')
3438 video_format.add_option('--all-formats',
3439 action='store_const', dest='format', help='download all available video formats', const='-1')
3440 video_format.add_option('--max-quality',
3441 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
3444 verbosity.add_option('-q', '--quiet',
3445 action='store_true', dest='quiet', help='activates quiet mode', default=False)
3446 verbosity.add_option('-s', '--simulate',
3447 action='store_true', dest='simulate', help='do not download video', default=False)
3448 verbosity.add_option('-g', '--get-url',
3449 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
3450 verbosity.add_option('-e', '--get-title',
3451 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
3452 verbosity.add_option('--get-thumbnail',
3453 action='store_true', dest='getthumbnail',
3454 help='simulate, quiet but print thumbnail URL', default=False)
3455 verbosity.add_option('--get-description',
3456 action='store_true', dest='getdescription',
3457 help='simulate, quiet but print video description', default=False)
3458 verbosity.add_option('--get-filename',
3459 action='store_true', dest='getfilename',
3460 help='simulate, quiet but print output filename', default=False)
3461 verbosity.add_option('--no-progress',
3462 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
3463 verbosity.add_option('--console-title',
3464 action='store_true', dest='consoletitle',
3465 help='display progress in console titlebar', default=False)
3468 filesystem.add_option('-t', '--title',
3469 action='store_true', dest='usetitle', help='use title in file name', default=False)
3470 filesystem.add_option('-l', '--literal',
3471 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
3472 filesystem.add_option('-A', '--auto-number',
3473 action='store_true', dest='autonumber',
3474 help='number downloaded files starting from 00000', default=False)
3475 filesystem.add_option('-o', '--output',
3476 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
3477 filesystem.add_option('-a', '--batch-file',
3478 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
3479 filesystem.add_option('-w', '--no-overwrites',
3480 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
3481 filesystem.add_option('-c', '--continue',
3482 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
3483 filesystem.add_option('--cookies',
3484 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
3485 filesystem.add_option('--no-part',
3486 action='store_true', dest='nopart', help='do not use .part files', default=False)
3487 filesystem.add_option('--no-mtime',
3488 action='store_false', dest='updatetime',
3489 help='do not use the Last-modified header to set the file modification time', default=True)
3490 filesystem.add_option('--write-description',
3491 action='store_true', dest='writedescription',
3492 help='write video description to a .description file', default=False)
3493 filesystem.add_option('--write-info-json',
3494 action='store_true', dest='writeinfojson',
3495 help='write video metadata to a .info.json file', default=False)
3498 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
3499 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
3500 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
3501 help='"best", "aac" or "mp3"; best by default')
3504 parser.add_option_group(general)
3505 parser.add_option_group(filesystem)
3506 parser.add_option_group(verbosity)
3507 parser.add_option_group(video_format)
3508 parser.add_option_group(authentication)
3509 parser.add_option_group(postproc)
3511 opts, args = parser.parse_args()
3513 return parser, opts, args
3516 parser, opts, args = parseOpts()
3518 # Open appropriate CookieJar
3519 if opts.cookiefile is None:
3520 jar = cookielib.CookieJar()
3523 jar = cookielib.MozillaCookieJar(opts.cookiefile)
3524 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
3526 except (IOError, OSError), err:
3527 sys.exit(u'ERROR: unable to open cookie file')
3530 if opts.dump_user_agent:
3531 print std_headers['User-Agent']
3534 # General configuration
3535 cookie_processor = urllib2.HTTPCookieProcessor(jar)
3536 opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
3537 urllib2.install_opener(opener)
3538 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
3540 # Batch file verification
3542 if opts.batchfile is not None:
3544 if opts.batchfile == '-':
3547 batchfd = open(opts.batchfile, 'r')
3548 batchurls = batchfd.readlines()
3549 batchurls = [x.strip() for x in batchurls]
3550 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
3552 sys.exit(u'ERROR: batch file could not be read')
3553 all_urls = batchurls + args
3555 # Conflicting, missing and erroneous options
3556 if opts.usenetrc and (opts.username is not None or opts.password is not None):
3557 parser.error(u'using .netrc conflicts with giving username/password')
3558 if opts.password is not None and opts.username is None:
3559 parser.error(u'account username missing')
3560 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
3561 parser.error(u'using output template conflicts with using title, literal title or auto number')
3562 if opts.usetitle and opts.useliteral:
3563 parser.error(u'using title conflicts with using literal title')
3564 if opts.username is not None and opts.password is None:
3565 opts.password = getpass.getpass(u'Type account password and press return:')
3566 if opts.ratelimit is not None:
3567 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
3568 if numeric_limit is None:
3569 parser.error(u'invalid rate limit specified')
3570 opts.ratelimit = numeric_limit
3571 if opts.retries is not None:
3573 opts.retries = long(opts.retries)
3574 except (TypeError, ValueError), err:
3575 parser.error(u'invalid retry count specified')
3577 opts.playliststart = int(opts.playliststart)
3578 if opts.playliststart <= 0:
3579 raise ValueError(u'Playlist start must be positive')
3580 except (TypeError, ValueError), err:
3581 parser.error(u'invalid playlist start number specified')
3583 opts.playlistend = int(opts.playlistend)
3584 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
3585 raise ValueError(u'Playlist end must be greater than playlist start')
3586 except (TypeError, ValueError), err:
3587 parser.error(u'invalid playlist end number specified')
3588 if opts.extractaudio:
3589 if opts.audioformat not in ['best', 'aac', 'mp3']:
3590 parser.error(u'invalid audio format specified')
3592 # Information extractors
3593 youtube_ie = YoutubeIE()
3594 metacafe_ie = MetacafeIE(youtube_ie)
3595 dailymotion_ie = DailymotionIE()
3596 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
3597 youtube_user_ie = YoutubeUserIE(youtube_ie)
3598 youtube_search_ie = YoutubeSearchIE(youtube_ie)
3599 google_ie = GoogleIE()
3600 google_search_ie = GoogleSearchIE(google_ie)
3601 photobucket_ie = PhotobucketIE()
3602 yahoo_ie = YahooIE()
3603 yahoo_search_ie = YahooSearchIE(yahoo_ie)
3604 deposit_files_ie = DepositFilesIE()
3605 facebook_ie = FacebookIE()
3606 bliptv_ie = BlipTVIE()
3607 vimeo_ie = VimeoIE()
3608 myvideo_ie = MyVideoIE()
3609 comedycentral_ie = ComedyCentralIE()
3611 generic_ie = GenericIE()
3614 fd = FileDownloader({
3615 'usenetrc': opts.usenetrc,
3616 'username': opts.username,
3617 'password': opts.password,
3618 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
3619 'forceurl': opts.geturl,
3620 'forcetitle': opts.gettitle,
3621 'forcethumbnail': opts.getthumbnail,
3622 'forcedescription': opts.getdescription,
3623 'forcefilename': opts.getfilename,
3624 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
3625 'format': opts.format,
3626 'format_limit': opts.format_limit,
3627 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
3628 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
3629 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
3630 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
3631 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
3632 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
3633 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
3634 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
3635 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
3636 or u'%(id)s.%(ext)s'),
3637 'ignoreerrors': opts.ignoreerrors,
3638 'ratelimit': opts.ratelimit,
3639 'nooverwrites': opts.nooverwrites,
3640 'retries': opts.retries,
3641 'continuedl': opts.continue_dl,
3642 'noprogress': opts.noprogress,
3643 'playliststart': opts.playliststart,
3644 'playlistend': opts.playlistend,
3645 'logtostderr': opts.outtmpl == '-',
3646 'consoletitle': opts.consoletitle,
3647 'nopart': opts.nopart,
3648 'updatetime': opts.updatetime,
3649 'writedescription': opts.writedescription,
3650 'writeinfojson': opts.writeinfojson,
3652 fd.add_info_extractor(youtube_search_ie)
3653 fd.add_info_extractor(youtube_pl_ie)
3654 fd.add_info_extractor(youtube_user_ie)
3655 fd.add_info_extractor(metacafe_ie)
3656 fd.add_info_extractor(dailymotion_ie)
3657 fd.add_info_extractor(youtube_ie)
3658 fd.add_info_extractor(google_ie)
3659 fd.add_info_extractor(google_search_ie)
3660 fd.add_info_extractor(photobucket_ie)
3661 fd.add_info_extractor(yahoo_ie)
3662 fd.add_info_extractor(yahoo_search_ie)
3663 fd.add_info_extractor(deposit_files_ie)
3664 fd.add_info_extractor(facebook_ie)
3665 fd.add_info_extractor(bliptv_ie)
3666 fd.add_info_extractor(vimeo_ie)
3667 fd.add_info_extractor(myvideo_ie)
3668 fd.add_info_extractor(comedycentral_ie)
3670 # This must come last since it's the
3671 # fallback if none of the others work
3672 fd.add_info_extractor(generic_ie)
3675 if opts.extractaudio:
3676 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
3679 if opts.update_self:
3680 updateSelf(fd, sys.argv[0])
3683 if len(all_urls) < 1:
3684 if not opts.update_self:
3685 parser.error(u'you must provide at least one URL')
3688 retcode = fd.download(all_urls)
3690 # Dump cookie jar if requested
3691 if opts.cookiefile is not None:
3694 except (IOError, OSError), err:
3695 sys.exit(u'ERROR: unable to save cookie jar')
3700 if __name__ == '__main__':
3703 except DownloadError:
3705 except SameFileError:
3706 sys.exit(u'ERROR: fixed output name but more than one file to download')
3707 except KeyboardInterrupt:
3708 sys.exit(u'\nERROR: Interrupted by user')
3710 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: