2 # -*- coding: utf-8 -*-
5 'Ricardo Garcia Gonzalez',
13 'Philipp Hagemeister',
20 __license__ = 'Public Domain'
21 __version__ = '2011.11.23'
23 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
52 except ImportError: # Python 2.4
55 import cStringIO as StringIO
59 # parse_qs was moved from the cgi module to the urlparse module recently.
61 from urlparse import parse_qs
63 from cgi import parse_qs
71 import xml.etree.ElementTree
72 except ImportError: # Python<2.5: Not officially supported, but let it slip
73 warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.')
76 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
77 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
78 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
79 'Accept-Encoding': 'gzip, deflate',
80 'Accept-Language': 'en-us,en;q=0.5',
85 except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
91 def raiseError(msg, i):
92 raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
93 def skipSpace(i, expectMore=True):
94 while i < len(s) and s[i] in ' \t\r\n':
98 raiseError('Premature end', i)
100 def decodeEscape(match):
116 return unichr(int(esc[1:5], 16))
117 if len(esc) == 5+6 and esc[5:7] == '\\u':
118 hi = int(esc[1:5], 16)
119 low = int(esc[7:11], 16)
120 return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
121 raise ValueError('Unknown escape ' + str(esc))
128 while s[e-bslashes-1] == '\\':
130 if bslashes % 2 == 1:
134 rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
135 stri = rexp.sub(decodeEscape, s[i:e])
141 if s[i] == '}': # Empty dictionary
145 raiseError('Expected a string object key', i)
146 i,key = parseString(i)
148 if i >= len(s) or s[i] != ':':
149 raiseError('Expected a colon', i)
156 raiseError('Expected comma or closing curly brace', i)
161 if s[i] == ']': # Empty array
166 i = skipSpace(i) # Raise exception if premature end
170 raiseError('Expected a comma or closing bracket', i)
172 def parseDiscrete(i):
173 for k,v in {'true': True, 'false': False, 'null': None}.items():
174 if s.startswith(k, i):
176 raiseError('Not a boolean (or null)', i)
178 mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
180 raiseError('Not a number', i)
182 if '.' in nums or 'e' in nums or 'E' in nums:
183 return (i+len(nums), float(nums))
184 return (i+len(nums), int(nums))
185 CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
188 i,res = CHARMAP.get(s[i], parseNumber)(i)
189 i = skipSpace(i, False)
193 raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
196 def preferredencoding():
197 """Get preferred encoding.
199 Returns the best encoding scheme for the system, based on
200 locale.getpreferredencoding() and some further tweaks.
202 def yield_preferredencoding():
204 pref = locale.getpreferredencoding()
210 return yield_preferredencoding().next()
213 def htmlentity_transform(matchobj):
214 """Transforms an HTML entity to a Unicode character.
216 This function receives a match object and is intended to be used with
217 the re.sub() function.
219 entity = matchobj.group(1)
221 # Known non-numeric HTML entity
222 if entity in htmlentitydefs.name2codepoint:
223 return unichr(htmlentitydefs.name2codepoint[entity])
226 mobj = re.match(ur'(?u)#(x?\d+)', entity)
228 numstr = mobj.group(1)
229 if numstr.startswith(u'x'):
231 numstr = u'0%s' % numstr
234 return unichr(long(numstr, base))
236 # Unknown entity in name, return its literal representation
237 return (u'&%s;' % entity)
240 def sanitize_title(utitle):
241 """Sanitizes a video title so it could be used as part of a filename."""
242 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
243 return utitle.replace(unicode(os.sep), u'%')
246 def sanitize_open(filename, open_mode):
247 """Try to open the given filename, and slightly tweak it if this fails.
249 Attempts to open the given filename. If this fails, it tries to change
250 the filename slightly, step by step, until it's either able to open it
251 or it fails and raises a final exception, like the standard open()
254 It returns the tuple (stream, definitive_file_name).
258 if sys.platform == 'win32':
260 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
261 return (sys.stdout, filename)
262 stream = open(filename, open_mode)
263 return (stream, filename)
264 except (IOError, OSError), err:
265 # In case of error, try to remove win32 forbidden chars
266 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
268 # An exception here should be caught in the caller
269 stream = open(filename, open_mode)
270 return (stream, filename)
273 def timeconvert(timestr):
274 """Convert RFC 2822 defined time string into system timestamp"""
276 timetuple = email.utils.parsedate_tz(timestr)
277 if timetuple is not None:
278 timestamp = email.utils.mktime_tz(timetuple)
281 def _simplify_title(title):
282 expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
283 return expr.sub(u'_', title).strip(u'_')
285 class DownloadError(Exception):
286 """Download Error exception.
288 This exception may be thrown by FileDownloader objects if they are not
289 configured to continue on errors. They will contain the appropriate
295 class SameFileError(Exception):
296 """Same File exception.
298 This exception will be thrown by FileDownloader objects if they detect
299 multiple files would have to be downloaded to the same file on disk.
304 class PostProcessingError(Exception):
305 """Post Processing exception.
307 This exception may be raised by PostProcessor's .run() method to
308 indicate an error in the postprocessing task.
313 class UnavailableVideoError(Exception):
314 """Unavailable Format exception.
316 This exception will be thrown when a video is requested
317 in a format that is not available for that video.
322 class ContentTooShortError(Exception):
323 """Content Too Short exception.
325 This exception may be raised by FileDownloader objects when a file they
326 download is too small for what the server announced first, indicating
327 the connection was probably interrupted.
333 def __init__(self, downloaded, expected):
334 self.downloaded = downloaded
335 self.expected = expected
338 class YoutubeDLHandler(urllib2.HTTPHandler):
339 """Handler for HTTP requests and responses.
341 This class, when installed with an OpenerDirector, automatically adds
342 the standard headers to every HTTP request and handles gzipped and
343 deflated responses from web servers. If compression is to be avoided in
344 a particular request, the original request in the program code only has
345 to include the HTTP header "Youtubedl-No-Compression", which will be
346 removed before making the real request.
348 Part of this code was copied from:
350 http://techknack.net/python-urllib2-handlers/
352 Andrew Rowls, the author of that code, agreed to release it to the
359 return zlib.decompress(data, -zlib.MAX_WBITS)
361 return zlib.decompress(data)
364 def addinfourl_wrapper(stream, headers, url, code):
365 if hasattr(urllib2.addinfourl, 'getcode'):
366 return urllib2.addinfourl(stream, headers, url, code)
367 ret = urllib2.addinfourl(stream, headers, url)
371 def http_request(self, req):
372 for h in std_headers:
375 req.add_header(h, std_headers[h])
376 if 'Youtubedl-no-compression' in req.headers:
377 if 'Accept-encoding' in req.headers:
378 del req.headers['Accept-encoding']
379 del req.headers['Youtubedl-no-compression']
382 def http_response(self, req, resp):
385 if resp.headers.get('Content-encoding', '') == 'gzip':
386 gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
387 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
388 resp.msg = old_resp.msg
390 if resp.headers.get('Content-encoding', '') == 'deflate':
391 gz = StringIO.StringIO(self.deflate(resp.read()))
392 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
393 resp.msg = old_resp.msg
397 class FileDownloader(object):
398 """File Downloader class.
400 File downloader objects are the ones responsible of downloading the
401 actual video file and writing it to disk if the user has requested
402 it, among some other tasks. In most cases there should be one per
403 program. As, given a video URL, the downloader doesn't know how to
404 extract all the needed information, task that InfoExtractors do, it
405 has to pass the URL to one of them.
407 For this, file downloader objects have a method that allows
408 InfoExtractors to be registered in a given order. When it is passed
409 a URL, the file downloader handles it to the first InfoExtractor it
410 finds that reports being able to handle it. The InfoExtractor extracts
411 all the information about the video or videos the URL refers to, and
412 asks the FileDownloader to process the video information, possibly
413 downloading the video.
415 File downloaders accept a lot of parameters. In order not to saturate
416 the object constructor with arguments, it receives a dictionary of
417 options instead. These options are available through the params
418 attribute for the InfoExtractors to use. The FileDownloader also
419 registers itself as the downloader in charge for the InfoExtractors
420 that are added to it, so this is a "mutual registration".
424 username: Username for authentication purposes.
425 password: Password for authentication purposes.
426 usenetrc: Use netrc for authentication instead.
427 quiet: Do not print messages to stdout.
428 forceurl: Force printing final URL.
429 forcetitle: Force printing title.
430 forcethumbnail: Force printing thumbnail URL.
431 forcedescription: Force printing description.
432 forcefilename: Force printing final filename.
433 simulate: Do not download the video files.
434 format: Video format code.
435 format_limit: Highest quality format to try.
436 outtmpl: Template for output names.
437 ignoreerrors: Do not stop on download errors.
438 ratelimit: Download speed limit, in bytes/sec.
439 nooverwrites: Prevent overwriting files.
440 retries: Number of times to retry for HTTP error 5xx
441 continuedl: Try to continue downloads if possible.
442 noprogress: Do not print the progress bar.
443 playliststart: Playlist item to start at.
444 playlistend: Playlist item to end at.
445 matchtitle: Download only matching titles.
446 rejecttitle: Reject downloads for matching titles.
447 logtostderr: Log messages to stderr instead of stdout.
448 consoletitle: Display progress in console window's titlebar.
449 nopart: Do not use temporary .part files.
450 updatetime: Use the Last-modified header to set output file timestamps.
451 writedescription: Write the video description to a .description file
452 writeinfojson: Write the video description to a .info.json file
458 _download_retcode = None
459 _num_downloads = None
462 def __init__(self, params):
463 """Create a FileDownloader object with the given options."""
466 self._download_retcode = 0
467 self._num_downloads = 0
468 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
472 def format_bytes(bytes):
475 if type(bytes) is str:
480 exponent = long(math.log(bytes, 1024.0))
481 suffix = 'bkMGTPEZY'[exponent]
482 converted = float(bytes) / float(1024 ** exponent)
483 return '%.2f%s' % (converted, suffix)
486 def calc_percent(byte_counter, data_len):
489 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
492 def calc_eta(start, now, total, current):
496 if current == 0 or dif < 0.001: # One millisecond
498 rate = float(current) / dif
499 eta = long((float(total) - float(current)) / rate)
500 (eta_mins, eta_secs) = divmod(eta, 60)
503 return '%02d:%02d' % (eta_mins, eta_secs)
506 def calc_speed(start, now, bytes):
508 if bytes == 0 or dif < 0.001: # One millisecond
509 return '%10s' % '---b/s'
510 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
513 def best_block_size(elapsed_time, bytes):
514 new_min = max(bytes / 2.0, 1.0)
515 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
516 if elapsed_time < 0.001:
518 rate = bytes / elapsed_time
526 def parse_bytes(bytestr):
527 """Parse a string indicating a byte quantity into a long integer."""
528 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
531 number = float(matchobj.group(1))
532 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
533 return long(round(number * multiplier))
535 def add_info_extractor(self, ie):
536 """Add an InfoExtractor object to the end of the list."""
538 ie.set_downloader(self)
540 def add_post_processor(self, pp):
541 """Add a PostProcessor object to the end of the chain."""
543 pp.set_downloader(self)
545 def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
546 """Print message to stdout if not in quiet mode."""
548 if not self.params.get('quiet', False):
549 terminator = [u'\n', u''][skip_eol]
550 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
551 self._screen_file.flush()
552 except (UnicodeEncodeError), err:
553 if not ignore_encoding_errors:
556 def to_stderr(self, message):
557 """Print message to stderr."""
558 print >>sys.stderr, message.encode(preferredencoding())
560 def to_cons_title(self, message):
561 """Set console/terminal window title to message."""
562 if not self.params.get('consoletitle', False):
564 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
565 # c_wchar_p() might not be necessary if `message` is
566 # already of type unicode()
567 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
568 elif 'TERM' in os.environ:
569 sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
571 def fixed_template(self):
572 """Checks if the output template is fixed."""
573 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
575 def trouble(self, message=None):
576 """Determine action to take when a download problem appears.
578 Depending on if the downloader has been configured to ignore
579 download errors or not, this method may throw an exception or
580 not when errors are found, after printing the message.
582 if message is not None:
583 self.to_stderr(message)
584 if not self.params.get('ignoreerrors', False):
585 raise DownloadError(message)
586 self._download_retcode = 1
588 def slow_down(self, start_time, byte_counter):
589 """Sleep if the download speed is over the rate limit."""
590 rate_limit = self.params.get('ratelimit', None)
591 if rate_limit is None or byte_counter == 0:
594 elapsed = now - start_time
597 speed = float(byte_counter) / elapsed
598 if speed > rate_limit:
599 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
601 def temp_name(self, filename):
602 """Returns a temporary filename for the given filename."""
603 if self.params.get('nopart', False) or filename == u'-' or \
604 (os.path.exists(filename) and not os.path.isfile(filename)):
606 return filename + u'.part'
608 def undo_temp_name(self, filename):
609 if filename.endswith(u'.part'):
610 return filename[:-len(u'.part')]
613 def try_rename(self, old_filename, new_filename):
615 if old_filename == new_filename:
617 os.rename(old_filename, new_filename)
618 except (IOError, OSError), err:
619 self.trouble(u'ERROR: unable to rename file')
621 def try_utime(self, filename, last_modified_hdr):
622 """Try to set the last-modified time of the given file."""
623 if last_modified_hdr is None:
625 if not os.path.isfile(filename):
627 timestr = last_modified_hdr
630 filetime = timeconvert(timestr)
634 os.utime(filename, (time.time(), filetime))
639 def report_writedescription(self, descfn):
640 """ Report that the description file is being written """
641 self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
643 def report_writeinfojson(self, infofn):
644 """ Report that the metadata file has been written """
645 self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
647 def report_destination(self, filename):
648 """Report destination filename."""
649 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
651 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
652 """Report download progress."""
653 if self.params.get('noprogress', False):
655 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
656 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
657 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
658 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
660 def report_resuming_byte(self, resume_len):
661 """Report attempt to resume at given byte."""
662 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
664 def report_retry(self, count, retries):
665 """Report retry in case of HTTP error 5xx"""
666 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
668 def report_file_already_downloaded(self, file_name):
669 """Report file has already been fully downloaded."""
671 self.to_screen(u'[download] %s has already been downloaded' % file_name)
672 except (UnicodeEncodeError), err:
673 self.to_screen(u'[download] The file has already been downloaded')
675 def report_unable_to_resume(self):
676 """Report it was impossible to resume download."""
677 self.to_screen(u'[download] Unable to resume')
679 def report_finish(self):
680 """Report download finished."""
681 if self.params.get('noprogress', False):
682 self.to_screen(u'[download] Download completed')
686 def increment_downloads(self):
687 """Increment the ordinal that assigns a number to each file."""
688 self._num_downloads += 1
690 def prepare_filename(self, info_dict):
691 """Generate the output filename."""
693 template_dict = dict(info_dict)
694 template_dict['epoch'] = unicode(long(time.time()))
695 template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
696 filename = self.params['outtmpl'] % template_dict
698 except (ValueError, KeyError), err:
699 self.trouble(u'ERROR: invalid system charset or erroneous output template')
702 def process_info(self, info_dict):
703 """Process a single dictionary returned by an InfoExtractor."""
704 filename = self.prepare_filename(info_dict)
707 if self.params.get('forcetitle', False):
708 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
709 if self.params.get('forceurl', False):
710 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
711 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
712 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
713 if self.params.get('forcedescription', False) and 'description' in info_dict:
714 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
715 if self.params.get('forcefilename', False) and filename is not None:
716 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
717 if self.params.get('forceformat', False):
718 print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
720 # Do nothing else if in simulate mode
721 if self.params.get('simulate', False):
727 matchtitle=self.params.get('matchtitle',False)
728 rejecttitle=self.params.get('rejecttitle',False)
729 title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
730 if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
731 self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle))
733 if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
734 self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle))
737 if self.params.get('nooverwrites', False) and os.path.exists(filename):
738 self.to_stderr(u'WARNING: file exists and will be skipped')
742 dn = os.path.dirname(filename)
743 if dn != '' and not os.path.exists(dn):
745 except (OSError, IOError), err:
746 self.trouble(u'ERROR: unable to create directory ' + unicode(err))
749 if self.params.get('writedescription', False):
751 descfn = filename + '.description'
752 self.report_writedescription(descfn)
753 descfile = open(descfn, 'wb')
755 descfile.write(info_dict['description'].encode('utf-8'))
758 except (OSError, IOError):
759 self.trouble(u'ERROR: Cannot write description file ' + descfn)
762 if self.params.get('writeinfojson', False):
763 infofn = filename + '.info.json'
764 self.report_writeinfojson(infofn)
767 except (NameError,AttributeError):
768 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
771 infof = open(infofn, 'wb')
773 json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
774 json.dump(json_info_dict, infof)
777 except (OSError, IOError):
778 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
781 if not self.params.get('skip_download', False):
783 success = self._do_download(filename, info_dict)
784 except (OSError, IOError), err:
785 raise UnavailableVideoError
786 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
787 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
789 except (ContentTooShortError, ), err:
790 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
795 self.post_process(filename, info_dict)
796 except (PostProcessingError), err:
797 self.trouble(u'ERROR: postprocessing: %s' % str(err))
800 def download(self, url_list):
801 """Download a given list of URLs."""
802 if len(url_list) > 1 and self.fixed_template():
803 raise SameFileError(self.params['outtmpl'])
806 suitable_found = False
808 # Go to next InfoExtractor if not suitable
809 if not ie.suitable(url):
812 # Suitable InfoExtractor found
813 suitable_found = True
815 # Extract information from URL and process it
818 # Suitable InfoExtractor had been found; go to next URL
821 if not suitable_found:
822 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
824 return self._download_retcode
826 def post_process(self, filename, ie_info):
827 """Run the postprocessing chain on the given file."""
829 info['filepath'] = filename
835 def _download_with_rtmpdump(self, filename, url, player_url):
836 self.report_destination(filename)
837 tmpfilename = self.temp_name(filename)
839 # Check for rtmpdump first
841 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
842 except (OSError, IOError):
843 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
846 # Download using rtmpdump. rtmpdump returns exit code 2 when
847 # the connection was interrumpted and resuming appears to be
848 # possible. This is part of rtmpdump's normal usage, AFAIK.
849 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
850 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
851 while retval == 2 or retval == 1:
852 prevsize = os.path.getsize(tmpfilename)
853 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
854 time.sleep(5.0) # This seems to be needed
855 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
856 cursize = os.path.getsize(tmpfilename)
857 if prevsize == cursize and retval == 1:
859 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
860 if prevsize == cursize and retval == 2 and cursize > 1024:
861 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
865 self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
866 self.try_rename(tmpfilename, filename)
869 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
872 def _do_download(self, filename, info_dict):
873 url = info_dict['url']
874 player_url = info_dict.get('player_url', None)
876 # Check file already present
877 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
878 self.report_file_already_downloaded(filename)
881 # Attempt to download using rtmpdump
882 if url.startswith('rtmp'):
883 return self._download_with_rtmpdump(filename, url, player_url)
885 tmpfilename = self.temp_name(filename)
888 # Do not include the Accept-Encoding header
889 headers = {'Youtubedl-no-compression': 'True'}
890 basic_request = urllib2.Request(url, None, headers)
891 request = urllib2.Request(url, None, headers)
893 # Establish possible resume length
894 if os.path.isfile(tmpfilename):
895 resume_len = os.path.getsize(tmpfilename)
901 if self.params.get('continuedl', False):
902 self.report_resuming_byte(resume_len)
903 request.add_header('Range','bytes=%d-' % resume_len)
909 retries = self.params.get('retries', 0)
910 while count <= retries:
911 # Establish connection
913 if count == 0 and 'urlhandle' in info_dict:
914 data = info_dict['urlhandle']
915 data = urllib2.urlopen(request)
917 except (urllib2.HTTPError, ), err:
918 if (err.code < 500 or err.code >= 600) and err.code != 416:
919 # Unexpected HTTP error
921 elif err.code == 416:
922 # Unable to resume (requested range not satisfiable)
924 # Open the connection again without the range header
925 data = urllib2.urlopen(basic_request)
926 content_length = data.info()['Content-Length']
927 except (urllib2.HTTPError, ), err:
928 if err.code < 500 or err.code >= 600:
931 # Examine the reported length
932 if (content_length is not None and
933 (resume_len - 100 < long(content_length) < resume_len + 100)):
934 # The file had already been fully downloaded.
935 # Explanation to the above condition: in issue #175 it was revealed that
936 # YouTube sometimes adds or removes a few bytes from the end of the file,
937 # changing the file size slightly and causing problems for some users. So
938 # I decided to implement a suggested change and consider the file
939 # completely downloaded if the file size differs less than 100 bytes from
940 # the one in the hard drive.
941 self.report_file_already_downloaded(filename)
942 self.try_rename(tmpfilename, filename)
945 # The length does not match, we start the download over
946 self.report_unable_to_resume()
952 self.report_retry(count, retries)
955 self.trouble(u'ERROR: giving up after %s retries' % retries)
958 data_len = data.info().get('Content-length', None)
959 if data_len is not None:
960 data_len = long(data_len) + resume_len
961 data_len_str = self.format_bytes(data_len)
962 byte_counter = 0 + resume_len
968 data_block = data.read(block_size)
970 if len(data_block) == 0:
972 byte_counter += len(data_block)
974 # Open file just in time
977 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
978 assert stream is not None
979 filename = self.undo_temp_name(tmpfilename)
980 self.report_destination(filename)
981 except (OSError, IOError), err:
982 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
985 stream.write(data_block)
986 except (IOError, OSError), err:
987 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
989 block_size = self.best_block_size(after - before, len(data_block))
992 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
994 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
996 percent_str = self.calc_percent(byte_counter, data_len)
997 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
998 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
1001 self.slow_down(start, byte_counter - resume_len)
1004 self.trouble(u'\nERROR: Did not get any data blocks')
1007 self.report_finish()
1008 if data_len is not None and byte_counter != data_len:
1009 raise ContentTooShortError(byte_counter, long(data_len))
1010 self.try_rename(tmpfilename, filename)
1012 # Update file modification time
1013 if self.params.get('updatetime', True):
1014 info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
1019 class InfoExtractor(object):
1020 """Information Extractor class.
1022 Information extractors are the classes that, given a URL, extract
1023 information from the video (or videos) the URL refers to. This
1024 information includes the real video URL, the video title and simplified
1025 title, author and others. The information is stored in a dictionary
1026 which is then passed to the FileDownloader. The FileDownloader
1027 processes this information possibly downloading the video to the file
1028 system, among other possible outcomes. The dictionaries must include
1029 the following fields:
1031 id: Video identifier.
1032 url: Final video URL.
1033 uploader: Nickname of the video uploader.
1034 title: Literal title.
1035 stitle: Simplified title.
1036 ext: Video filename extension.
1037 format: Video format.
1038 player_url: SWF Player URL (may be None).
1040 The following fields are optional. Their primary purpose is to allow
1041 youtube-dl to serve as the backend for a video search function, such
1042 as the one in youtube2mp3. They are only used when their respective
1043 forced printing functions are called:
1045 thumbnail: Full URL to a video thumbnail image.
1046 description: One-line video description.
1048 Subclasses of this one should re-define the _real_initialize() and
1049 _real_extract() methods and define a _VALID_URL regexp.
1050 Probably, they should also be added to the list of extractors.
1056 def __init__(self, downloader=None):
1057 """Constructor. Receives an optional downloader."""
1059 self.set_downloader(downloader)
1061 def suitable(self, url):
1062 """Receives a URL and returns True if suitable for this IE."""
1063 return re.match(self._VALID_URL, url) is not None
1065 def initialize(self):
1066 """Initializes an instance (authentication, etc)."""
1068 self._real_initialize()
1071 def extract(self, url):
1072 """Extracts URL information and returns it in list of dicts."""
1074 return self._real_extract(url)
1076 def set_downloader(self, downloader):
1077 """Sets the downloader for this IE."""
1078 self._downloader = downloader
1080 def _real_initialize(self):
1081 """Real initialization process. Redefine in subclasses."""
1084 def _real_extract(self, url):
1085 """Real extraction process. Redefine in subclasses."""
1089 class YoutubeIE(InfoExtractor):
1090 """Information extractor for youtube.com."""
1092 _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
1093 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
1094 _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
1095 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
1096 _NETRC_MACHINE = 'youtube'
1097 # Listed in order of quality
1098 _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
1099 _video_extensions = {
1105 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
1110 _video_dimensions = {
1125 IE_NAME = u'youtube'
1127 def report_lang(self):
1128 """Report attempt to set language."""
1129 self._downloader.to_screen(u'[youtube] Setting language')
1131 def report_login(self):
1132 """Report attempt to log in."""
1133 self._downloader.to_screen(u'[youtube] Logging in')
1135 def report_age_confirmation(self):
1136 """Report attempt to confirm age."""
1137 self._downloader.to_screen(u'[youtube] Confirming age')
1139 def report_video_webpage_download(self, video_id):
1140 """Report attempt to download video webpage."""
1141 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
1143 def report_video_info_webpage_download(self, video_id):
1144 """Report attempt to download video info webpage."""
1145 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
1147 def report_information_extraction(self, video_id):
1148 """Report attempt to extract video information."""
1149 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
1151 def report_unavailable_format(self, video_id, format):
1152 """Report extracted video URL."""
1153 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
1155 def report_rtmp_download(self):
1156 """Indicate the download will use the RTMP protocol."""
1157 self._downloader.to_screen(u'[youtube] RTMP download detected')
1159 def _print_formats(self, formats):
1160 print 'Available formats:'
1162 print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
1164 def _real_initialize(self):
1165 if self._downloader is None:
1170 downloader_params = self._downloader.params
1172 # Attempt to use provided username and password or .netrc data
1173 if downloader_params.get('username', None) is not None:
1174 username = downloader_params['username']
1175 password = downloader_params['password']
1176 elif downloader_params.get('usenetrc', False):
1178 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
1179 if info is not None:
1183 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
1184 except (IOError, netrc.NetrcParseError), err:
1185 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
1189 request = urllib2.Request(self._LANG_URL)
1192 urllib2.urlopen(request).read()
1193 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1194 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
1197 # No authentication to be performed
1198 if username is None:
1203 'current_form': 'loginForm',
1205 'action_login': 'Log In',
1206 'username': username,
1207 'password': password,
1209 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
1212 login_results = urllib2.urlopen(request).read()
1213 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
1214 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
1216 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1217 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
1223 'action_confirm': 'Confirm',
1225 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
1227 self.report_age_confirmation()
1228 age_results = urllib2.urlopen(request).read()
1229 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1230 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1233 def _real_extract(self, url):
1234 # Extract video id from URL
1235 mobj = re.match(self._VALID_URL, url)
1237 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1239 video_id = mobj.group(2)
1242 self.report_video_webpage_download(video_id)
1243 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
1245 video_webpage = urllib2.urlopen(request).read()
1246 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1247 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1250 # Attempt to extract SWF player URL
1251 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1252 if mobj is not None:
1253 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1258 self.report_video_info_webpage_download(video_id)
1259 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1260 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1261 % (video_id, el_type))
1262 request = urllib2.Request(video_info_url)
1264 video_info_webpage = urllib2.urlopen(request).read()
1265 video_info = parse_qs(video_info_webpage)
1266 if 'token' in video_info:
1268 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1269 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1271 if 'token' not in video_info:
1272 if 'reason' in video_info:
1273 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1275 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1278 # Start extracting information
1279 self.report_information_extraction(video_id)
1282 if 'author' not in video_info:
1283 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1285 video_uploader = urllib.unquote_plus(video_info['author'][0])
1288 if 'title' not in video_info:
1289 self._downloader.trouble(u'ERROR: unable to extract video title')
1291 video_title = urllib.unquote_plus(video_info['title'][0])
1292 video_title = video_title.decode('utf-8')
1293 video_title = sanitize_title(video_title)
1296 simple_title = _simplify_title(video_title)
1299 if 'thumbnail_url' not in video_info:
1300 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1301 video_thumbnail = ''
1302 else: # don't panic if we can't find it
1303 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1307 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1308 if mobj is not None:
1309 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1310 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1311 for expression in format_expressions:
1313 upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1321 video_description = u'No description available.'
1322 if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
1323 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1324 if mobj is not None:
1325 video_description = mobj.group(1).decode('utf-8')
1327 html_parser = lxml.etree.HTMLParser(encoding='utf-8')
1328 vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
1329 video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
1330 # TODO use another parser
1333 video_token = urllib.unquote_plus(video_info['token'][0])
1335 # Decide which formats to download
1336 req_format = self._downloader.params.get('format', None)
1338 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1339 self.report_rtmp_download()
1340 video_url_list = [(None, video_info['conn'][0])]
1341 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1342 url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
1343 url_data = [parse_qs(uds) for uds in url_data_strs]
1344 url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
1345 url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
1347 format_limit = self._downloader.params.get('format_limit', None)
1348 if format_limit is not None and format_limit in self._available_formats:
1349 format_list = self._available_formats[self._available_formats.index(format_limit):]
1351 format_list = self._available_formats
1352 existing_formats = [x for x in format_list if x in url_map]
1353 if len(existing_formats) == 0:
1354 self._downloader.trouble(u'ERROR: no known formats available for video')
1356 if self._downloader.params.get('listformats', None):
1357 self._print_formats(existing_formats)
1359 if req_format is None or req_format == 'best':
1360 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1361 elif req_format == 'worst':
1362 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
1363 elif req_format in ('-1', 'all'):
1364 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1366 # Specific formats. We pick the first in a slash-delimeted sequence.
1367 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1368 req_formats = req_format.split('/')
1369 video_url_list = None
1370 for rf in req_formats:
1372 video_url_list = [(rf, url_map[rf])]
1374 if video_url_list is None:
1375 self._downloader.trouble(u'ERROR: requested format not available')
1378 self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
1381 for format_param, video_real_url in video_url_list:
1382 # At this point we have a new video
1383 self._downloader.increment_downloads()
1386 video_extension = self._video_extensions.get(format_param, 'flv')
1389 # Process video information
1390 self._downloader.process_info({
1391 'id': video_id.decode('utf-8'),
1392 'url': video_real_url.decode('utf-8'),
1393 'uploader': video_uploader.decode('utf-8'),
1394 'upload_date': upload_date,
1395 'title': video_title,
1396 'stitle': simple_title,
1397 'ext': video_extension.decode('utf-8'),
1398 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
1399 'thumbnail': video_thumbnail.decode('utf-8'),
1400 'description': video_description,
1401 'player_url': player_url,
1403 except UnavailableVideoError, err:
1404 self._downloader.trouble(u'\nERROR: unable to download video')
1407 class MetacafeIE(InfoExtractor):
1408 """Information Extractor for metacafe.com."""
1410 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1411 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1412 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1414 IE_NAME = u'metacafe'
1416 def __init__(self, youtube_ie, downloader=None):
1417 InfoExtractor.__init__(self, downloader)
1418 self._youtube_ie = youtube_ie
1420 def report_disclaimer(self):
1421 """Report disclaimer retrieval."""
1422 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1424 def report_age_confirmation(self):
1425 """Report attempt to confirm age."""
1426 self._downloader.to_screen(u'[metacafe] Confirming age')
1428 def report_download_webpage(self, video_id):
1429 """Report webpage download."""
1430 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1432 def report_extraction(self, video_id):
1433 """Report information extraction."""
1434 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1436 def _real_initialize(self):
1437 # Retrieve disclaimer
1438 request = urllib2.Request(self._DISCLAIMER)
1440 self.report_disclaimer()
1441 disclaimer = urllib2.urlopen(request).read()
1442 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1443 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1449 'submit': "Continue - I'm over 18",
1451 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1453 self.report_age_confirmation()
1454 disclaimer = urllib2.urlopen(request).read()
1455 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1456 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1459 def _real_extract(self, url):
1460 # Extract id and simplified title from URL
1461 mobj = re.match(self._VALID_URL, url)
1463 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1466 video_id = mobj.group(1)
1468 # Check if video comes from YouTube
1469 mobj2 = re.match(r'^yt-(.*)$', video_id)
1470 if mobj2 is not None:
1471 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1474 # At this point we have a new video
1475 self._downloader.increment_downloads()
1477 simple_title = mobj.group(2).decode('utf-8')
1479 # Retrieve video webpage to extract further information
1480 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1482 self.report_download_webpage(video_id)
1483 webpage = urllib2.urlopen(request).read()
1484 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1485 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1488 # Extract URL, uploader and title from webpage
1489 self.report_extraction(video_id)
1490 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1491 if mobj is not None:
1492 mediaURL = urllib.unquote(mobj.group(1))
1493 video_extension = mediaURL[-3:]
1495 # Extract gdaKey if available
1496 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1498 video_url = mediaURL
1500 gdaKey = mobj.group(1)
1501 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1503 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1505 self._downloader.trouble(u'ERROR: unable to extract media URL')
1507 vardict = parse_qs(mobj.group(1))
1508 if 'mediaData' not in vardict:
1509 self._downloader.trouble(u'ERROR: unable to extract media URL')
1511 mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1513 self._downloader.trouble(u'ERROR: unable to extract media URL')
1515 mediaURL = mobj.group(1).replace('\\/', '/')
1516 video_extension = mediaURL[-3:]
1517 video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1519 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1521 self._downloader.trouble(u'ERROR: unable to extract title')
1523 video_title = mobj.group(1).decode('utf-8')
1524 video_title = sanitize_title(video_title)
1526 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1528 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1530 video_uploader = mobj.group(1)
1533 # Process video information
1534 self._downloader.process_info({
1535 'id': video_id.decode('utf-8'),
1536 'url': video_url.decode('utf-8'),
1537 'uploader': video_uploader.decode('utf-8'),
1538 'upload_date': u'NA',
1539 'title': video_title,
1540 'stitle': simple_title,
1541 'ext': video_extension.decode('utf-8'),
1545 except UnavailableVideoError:
1546 self._downloader.trouble(u'\nERROR: unable to download video')
1549 class DailymotionIE(InfoExtractor):
1550 """Information Extractor for Dailymotion"""
1552 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1553 IE_NAME = u'dailymotion'
1555 def __init__(self, downloader=None):
1556 InfoExtractor.__init__(self, downloader)
1558 def report_download_webpage(self, video_id):
1559 """Report webpage download."""
1560 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1562 def report_extraction(self, video_id):
1563 """Report information extraction."""
1564 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1566 def _real_extract(self, url):
1567 # Extract id and simplified title from URL
1568 mobj = re.match(self._VALID_URL, url)
1570 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1573 # At this point we have a new video
1574 self._downloader.increment_downloads()
1575 video_id = mobj.group(1)
1577 simple_title = mobj.group(2).decode('utf-8')
1578 video_extension = 'flv'
1580 # Retrieve video webpage to extract further information
1581 request = urllib2.Request(url)
1582 request.add_header('Cookie', 'family_filter=off')
1584 self.report_download_webpage(video_id)
1585 webpage = urllib2.urlopen(request).read()
1586 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1587 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1590 # Extract URL, uploader and title from webpage
1591 self.report_extraction(video_id)
1592 mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)
1594 self._downloader.trouble(u'ERROR: unable to extract media URL')
1596 sequence = urllib.unquote(mobj.group(1))
1597 mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
1599 self._downloader.trouble(u'ERROR: unable to extract media URL')
1601 mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
1603 # if needed add http://www.dailymotion.com/ if relative URL
1605 video_url = mediaURL
1607 mobj = re.search(r'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?</title>', webpage)
1609 self._downloader.trouble(u'ERROR: unable to extract title')
1611 video_title = mobj.group(1).decode('utf-8')
1612 video_title = sanitize_title(video_title)
1614 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
1616 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1618 video_uploader = mobj.group(1)
1621 # Process video information
1622 self._downloader.process_info({
1623 'id': video_id.decode('utf-8'),
1624 'url': video_url.decode('utf-8'),
1625 'uploader': video_uploader.decode('utf-8'),
1626 'upload_date': u'NA',
1627 'title': video_title,
1628 'stitle': simple_title,
1629 'ext': video_extension.decode('utf-8'),
1633 except UnavailableVideoError:
1634 self._downloader.trouble(u'\nERROR: unable to download video')
1637 class GoogleIE(InfoExtractor):
1638 """Information extractor for video.google.com."""
1640 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1641 IE_NAME = u'video.google'
1643 def __init__(self, downloader=None):
1644 InfoExtractor.__init__(self, downloader)
1646 def report_download_webpage(self, video_id):
1647 """Report webpage download."""
1648 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1650 def report_extraction(self, video_id):
1651 """Report information extraction."""
1652 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1654 def _real_extract(self, url):
1655 # Extract id from URL
1656 mobj = re.match(self._VALID_URL, url)
1658 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1661 # At this point we have a new video
1662 self._downloader.increment_downloads()
1663 video_id = mobj.group(1)
1665 video_extension = 'mp4'
1667 # Retrieve video webpage to extract further information
1668 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1670 self.report_download_webpage(video_id)
1671 webpage = urllib2.urlopen(request).read()
1672 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1673 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1676 # Extract URL, uploader, and title from webpage
1677 self.report_extraction(video_id)
1678 mobj = re.search(r"download_url:'([^']+)'", webpage)
1680 video_extension = 'flv'
1681 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1683 self._downloader.trouble(u'ERROR: unable to extract media URL')
1685 mediaURL = urllib.unquote(mobj.group(1))
1686 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1687 mediaURL = mediaURL.replace('\\x26', '\x26')
1689 video_url = mediaURL
1691 mobj = re.search(r'<title>(.*)</title>', webpage)
1693 self._downloader.trouble(u'ERROR: unable to extract title')
1695 video_title = mobj.group(1).decode('utf-8')
1696 video_title = sanitize_title(video_title)
1697 simple_title = _simplify_title(video_title)
1699 # Extract video description
1700 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1702 self._downloader.trouble(u'ERROR: unable to extract video description')
1704 video_description = mobj.group(1).decode('utf-8')
1705 if not video_description:
1706 video_description = 'No description available.'
1708 # Extract video thumbnail
1709 if self._downloader.params.get('forcethumbnail', False):
1710 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1712 webpage = urllib2.urlopen(request).read()
1713 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1714 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1716 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1718 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1720 video_thumbnail = mobj.group(1)
1721 else: # we need something to pass to process_info
1722 video_thumbnail = ''
1725 # Process video information
1726 self._downloader.process_info({
1727 'id': video_id.decode('utf-8'),
1728 'url': video_url.decode('utf-8'),
1730 'upload_date': u'NA',
1731 'title': video_title,
1732 'stitle': simple_title,
1733 'ext': video_extension.decode('utf-8'),
1737 except UnavailableVideoError:
1738 self._downloader.trouble(u'\nERROR: unable to download video')
1741 class PhotobucketIE(InfoExtractor):
1742 """Information extractor for photobucket.com."""
1744 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1745 IE_NAME = u'photobucket'
1747 def __init__(self, downloader=None):
1748 InfoExtractor.__init__(self, downloader)
1750 def report_download_webpage(self, video_id):
1751 """Report webpage download."""
1752 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1754 def report_extraction(self, video_id):
1755 """Report information extraction."""
1756 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1758 def _real_extract(self, url):
1759 # Extract id from URL
1760 mobj = re.match(self._VALID_URL, url)
1762 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1765 # At this point we have a new video
1766 self._downloader.increment_downloads()
1767 video_id = mobj.group(1)
1769 video_extension = 'flv'
1771 # Retrieve video webpage to extract further information
1772 request = urllib2.Request(url)
1774 self.report_download_webpage(video_id)
1775 webpage = urllib2.urlopen(request).read()
1776 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1777 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1780 # Extract URL, uploader, and title from webpage
1781 self.report_extraction(video_id)
1782 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1784 self._downloader.trouble(u'ERROR: unable to extract media URL')
1786 mediaURL = urllib.unquote(mobj.group(1))
1788 video_url = mediaURL
1790 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1792 self._downloader.trouble(u'ERROR: unable to extract title')
1794 video_title = mobj.group(1).decode('utf-8')
1795 video_title = sanitize_title(video_title)
1796 simple_title = _simplify_title(vide_title)
1798 video_uploader = mobj.group(2).decode('utf-8')
1801 # Process video information
1802 self._downloader.process_info({
1803 'id': video_id.decode('utf-8'),
1804 'url': video_url.decode('utf-8'),
1805 'uploader': video_uploader,
1806 'upload_date': u'NA',
1807 'title': video_title,
1808 'stitle': simple_title,
1809 'ext': video_extension.decode('utf-8'),
1813 except UnavailableVideoError:
1814 self._downloader.trouble(u'\nERROR: unable to download video')
1817 class YahooIE(InfoExtractor):
1818 """Information extractor for video.yahoo.com."""
1820 # _VALID_URL matches all Yahoo! Video URLs
1821 # _VPAGE_URL matches only the extractable '/watch/' URLs
1822 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1823 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1824 IE_NAME = u'video.yahoo'
1826 def __init__(self, downloader=None):
1827 InfoExtractor.__init__(self, downloader)
1829 def report_download_webpage(self, video_id):
1830 """Report webpage download."""
1831 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1833 def report_extraction(self, video_id):
1834 """Report information extraction."""
1835 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1837 def _real_extract(self, url, new_video=True):
1838 # Extract ID from URL
1839 mobj = re.match(self._VALID_URL, url)
1841 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1844 # At this point we have a new video
1845 self._downloader.increment_downloads()
1846 video_id = mobj.group(2)
1847 video_extension = 'flv'
1849 # Rewrite valid but non-extractable URLs as
1850 # extractable English language /watch/ URLs
1851 if re.match(self._VPAGE_URL, url) is None:
1852 request = urllib2.Request(url)
1854 webpage = urllib2.urlopen(request).read()
1855 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1856 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1859 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1861 self._downloader.trouble(u'ERROR: Unable to extract id field')
1863 yahoo_id = mobj.group(1)
1865 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1867 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1869 yahoo_vid = mobj.group(1)
1871 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1872 return self._real_extract(url, new_video=False)
1874 # Retrieve video webpage to extract further information
1875 request = urllib2.Request(url)
1877 self.report_download_webpage(video_id)
1878 webpage = urllib2.urlopen(request).read()
1879 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1880 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1883 # Extract uploader and title from webpage
1884 self.report_extraction(video_id)
1885 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1887 self._downloader.trouble(u'ERROR: unable to extract video title')
1889 video_title = mobj.group(1).decode('utf-8')
1890 simple_title = _simplify_title(video_title)
1892 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1894 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1896 video_uploader = mobj.group(1).decode('utf-8')
1898 # Extract video thumbnail
1899 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1901 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1903 video_thumbnail = mobj.group(1).decode('utf-8')
1905 # Extract video description
1906 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1908 self._downloader.trouble(u'ERROR: unable to extract video description')
1910 video_description = mobj.group(1).decode('utf-8')
1911 if not video_description:
1912 video_description = 'No description available.'
1914 # Extract video height and width
1915 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1917 self._downloader.trouble(u'ERROR: unable to extract video height')
1919 yv_video_height = mobj.group(1)
1921 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1923 self._downloader.trouble(u'ERROR: unable to extract video width')
1925 yv_video_width = mobj.group(1)
1927 # Retrieve video playlist to extract media URL
1928 # I'm not completely sure what all these options are, but we
1929 # seem to need most of them, otherwise the server sends a 401.
1930 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1931 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1932 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1933 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1934 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1936 self.report_download_webpage(video_id)
1937 webpage = urllib2.urlopen(request).read()
1938 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1939 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1942 # Extract media URL from playlist XML
1943 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1945 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1947 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1948 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1951 # Process video information
1952 self._downloader.process_info({
1953 'id': video_id.decode('utf-8'),
1955 'uploader': video_uploader,
1956 'upload_date': u'NA',
1957 'title': video_title,
1958 'stitle': simple_title,
1959 'ext': video_extension.decode('utf-8'),
1960 'thumbnail': video_thumbnail.decode('utf-8'),
1961 'description': video_description,
1962 'thumbnail': video_thumbnail,
1965 except UnavailableVideoError:
1966 self._downloader.trouble(u'\nERROR: unable to download video')
1969 class VimeoIE(InfoExtractor):
1970 """Information extractor for vimeo.com."""
1972 # _VALID_URL matches Vimeo URLs
1973 _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
1976 def __init__(self, downloader=None):
1977 InfoExtractor.__init__(self, downloader)
1979 def report_download_webpage(self, video_id):
1980 """Report webpage download."""
1981 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
1983 def report_extraction(self, video_id):
1984 """Report information extraction."""
1985 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
1987 def _real_extract(self, url, new_video=True):
1988 # Extract ID from URL
1989 mobj = re.match(self._VALID_URL, url)
1991 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1994 # At this point we have a new video
1995 self._downloader.increment_downloads()
1996 video_id = mobj.group(1)
1998 # Retrieve video webpage to extract further information
1999 request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
2001 self.report_download_webpage(video_id)
2002 webpage = urllib2.urlopen(request).read()
2003 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2004 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2007 # Now we begin extracting as much information as we can from what we
2008 # retrieved. First we extract the information common to all extractors,
2009 # and latter we extract those that are Vimeo specific.
2010 self.report_extraction(video_id)
2013 mobj = re.search(r'<caption>(.*?)</caption>', webpage)
2015 self._downloader.trouble(u'ERROR: unable to extract video title')
2017 video_title = mobj.group(1).decode('utf-8')
2018 simple_title = _simplify_title(video_title)
2021 mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
2023 self._downloader.trouble(u'ERROR: unable to extract video uploader')
2025 video_uploader = mobj.group(1).decode('utf-8')
2027 # Extract video thumbnail
2028 mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
2030 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
2032 video_thumbnail = mobj.group(1).decode('utf-8')
2034 # # Extract video description
2035 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
2037 # self._downloader.trouble(u'ERROR: unable to extract video description')
2039 # video_description = mobj.group(1).decode('utf-8')
2040 # if not video_description: video_description = 'No description available.'
2041 video_description = 'Foo.'
2043 # Vimeo specific: extract request signature
2044 mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
2046 self._downloader.trouble(u'ERROR: unable to extract request signature')
2048 sig = mobj.group(1).decode('utf-8')
2050 # Vimeo specific: extract video quality information
2051 mobj = re.search(r'<isHD>(\d+)</isHD>', webpage)
2053 self._downloader.trouble(u'ERROR: unable to extract video quality information')
2055 quality = mobj.group(1).decode('utf-8')
2057 if int(quality) == 1:
2062 # Vimeo specific: Extract request signature expiration
2063 mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
2065 self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
2067 sig_exp = mobj.group(1).decode('utf-8')
2069 video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % (video_id, sig, sig_exp, quality)
2072 # Process video information
2073 self._downloader.process_info({
2074 'id': video_id.decode('utf-8'),
2076 'uploader': video_uploader,
2077 'upload_date': u'NA',
2078 'title': video_title,
2079 'stitle': simple_title,
2081 'thumbnail': video_thumbnail.decode('utf-8'),
2082 'description': video_description,
2083 'thumbnail': video_thumbnail,
2084 'description': video_description,
2087 except UnavailableVideoError:
2088 self._downloader.trouble(u'ERROR: unable to download video')
2091 class GenericIE(InfoExtractor):
2092 """Generic last-resort information extractor."""
2095 IE_NAME = u'generic'
2097 def __init__(self, downloader=None):
2098 InfoExtractor.__init__(self, downloader)
2100 def report_download_webpage(self, video_id):
2101 """Report webpage download."""
2102 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
2103 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
2105 def report_extraction(self, video_id):
2106 """Report information extraction."""
2107 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
2109 def _real_extract(self, url):
2110 # At this point we have a new video
2111 self._downloader.increment_downloads()
2113 video_id = url.split('/')[-1]
2114 request = urllib2.Request(url)
2116 self.report_download_webpage(video_id)
2117 webpage = urllib2.urlopen(request).read()
2118 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2119 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2121 except ValueError, err:
2122 # since this is the last-resort InfoExtractor, if
2123 # this error is thrown, it'll be thrown here
2124 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2127 self.report_extraction(video_id)
2128 # Start with something easy: JW Player in SWFObject
2129 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
2131 # Broaden the search a little bit
2132 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
2134 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2137 # It's possible that one of the regexes
2138 # matched, but returned an empty group:
2139 if mobj.group(1) is None:
2140 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2143 video_url = urllib.unquote(mobj.group(1))
2144 video_id = os.path.basename(video_url)
2146 # here's a fun little line of code for you:
2147 video_extension = os.path.splitext(video_id)[1][1:]
2148 video_id = os.path.splitext(video_id)[0]
2150 # it's tempting to parse this further, but you would
2151 # have to take into account all the variations like
2152 # Video Title - Site Name
2153 # Site Name | Video Title
2154 # Video Title - Tagline | Site Name
2155 # and so on and so forth; it's just not practical
2156 mobj = re.search(r'<title>(.*)</title>', webpage)
2158 self._downloader.trouble(u'ERROR: unable to extract title')
2160 video_title = mobj.group(1).decode('utf-8')
2161 video_title = sanitize_title(video_title)
2162 simple_title = _simplify_title(video_title)
2164 # video uploader is domain name
2165 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
2167 self._downloader.trouble(u'ERROR: unable to extract title')
2169 video_uploader = mobj.group(1).decode('utf-8')
2172 # Process video information
2173 self._downloader.process_info({
2174 'id': video_id.decode('utf-8'),
2175 'url': video_url.decode('utf-8'),
2176 'uploader': video_uploader,
2177 'upload_date': u'NA',
2178 'title': video_title,
2179 'stitle': simple_title,
2180 'ext': video_extension.decode('utf-8'),
2184 except UnavailableVideoError, err:
2185 self._downloader.trouble(u'\nERROR: unable to download video')
2188 class YoutubeSearchIE(InfoExtractor):
2189 """Information Extractor for YouTube search queries."""
2190 _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
2191 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
2192 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
2193 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2195 _max_youtube_results = 1000
2196 IE_NAME = u'youtube:search'
2198 def __init__(self, youtube_ie, downloader=None):
2199 InfoExtractor.__init__(self, downloader)
2200 self._youtube_ie = youtube_ie
2202 def report_download_page(self, query, pagenum):
2203 """Report attempt to download playlist page with given number."""
2204 query = query.decode(preferredencoding())
2205 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
2207 def _real_initialize(self):
2208 self._youtube_ie.initialize()
2210 def _real_extract(self, query):
2211 mobj = re.match(self._VALID_URL, query)
2213 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2216 prefix, query = query.split(':')
2218 query = query.encode('utf-8')
2220 self._download_n_results(query, 1)
2222 elif prefix == 'all':
2223 self._download_n_results(query, self._max_youtube_results)
2229 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2231 elif n > self._max_youtube_results:
2232 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
2233 n = self._max_youtube_results
2234 self._download_n_results(query, n)
2236 except ValueError: # parsing prefix as integer fails
2237 self._download_n_results(query, 1)
2240 def _download_n_results(self, query, n):
2241 """Downloads a specified number of results for a query"""
2244 already_seen = set()
2248 self.report_download_page(query, pagenum)
2249 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2250 request = urllib2.Request(result_url)
2252 page = urllib2.urlopen(request).read()
2253 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2254 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2257 # Extract video identifiers
2258 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2259 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2260 if video_id not in already_seen:
2261 video_ids.append(video_id)
2262 already_seen.add(video_id)
2263 if len(video_ids) == n:
2264 # Specified n videos reached
2265 for id in video_ids:
2266 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2269 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2270 for id in video_ids:
2271 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2274 pagenum = pagenum + 1
2277 class GoogleSearchIE(InfoExtractor):
2278 """Information Extractor for Google Video search queries."""
2279 _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+'
2280 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2281 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
2282 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
2284 _max_google_results = 1000
2285 IE_NAME = u'video.google:search'
2287 def __init__(self, google_ie, downloader=None):
2288 InfoExtractor.__init__(self, downloader)
2289 self._google_ie = google_ie
2291 def report_download_page(self, query, pagenum):
2292 """Report attempt to download playlist page with given number."""
2293 query = query.decode(preferredencoding())
2294 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
2296 def _real_initialize(self):
2297 self._google_ie.initialize()
2299 def _real_extract(self, query):
2300 mobj = re.match(self._VALID_URL, query)
2302 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2305 prefix, query = query.split(':')
2307 query = query.encode('utf-8')
2309 self._download_n_results(query, 1)
2311 elif prefix == 'all':
2312 self._download_n_results(query, self._max_google_results)
2318 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2320 elif n > self._max_google_results:
2321 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
2322 n = self._max_google_results
2323 self._download_n_results(query, n)
2325 except ValueError: # parsing prefix as integer fails
2326 self._download_n_results(query, 1)
2329 def _download_n_results(self, query, n):
2330 """Downloads a specified number of results for a query"""
2333 already_seen = set()
2337 self.report_download_page(query, pagenum)
2338 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2339 request = urllib2.Request(result_url)
2341 page = urllib2.urlopen(request).read()
2342 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2343 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2346 # Extract video identifiers
2347 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2348 video_id = mobj.group(1)
2349 if video_id not in already_seen:
2350 video_ids.append(video_id)
2351 already_seen.add(video_id)
2352 if len(video_ids) == n:
2353 # Specified n videos reached
2354 for id in video_ids:
2355 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2358 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2359 for id in video_ids:
2360 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2363 pagenum = pagenum + 1
2366 class YahooSearchIE(InfoExtractor):
2367 """Information Extractor for Yahoo! Video search queries."""
2368 _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
2369 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2370 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2371 _MORE_PAGES_INDICATOR = r'\s*Next'
2373 _max_yahoo_results = 1000
2374 IE_NAME = u'video.yahoo:search'
2376 def __init__(self, yahoo_ie, downloader=None):
2377 InfoExtractor.__init__(self, downloader)
2378 self._yahoo_ie = yahoo_ie
2380 def report_download_page(self, query, pagenum):
2381 """Report attempt to download playlist page with given number."""
2382 query = query.decode(preferredencoding())
2383 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2385 def _real_initialize(self):
2386 self._yahoo_ie.initialize()
2388 def _real_extract(self, query):
2389 mobj = re.match(self._VALID_URL, query)
2391 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2394 prefix, query = query.split(':')
2396 query = query.encode('utf-8')
2398 self._download_n_results(query, 1)
2400 elif prefix == 'all':
2401 self._download_n_results(query, self._max_yahoo_results)
2407 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2409 elif n > self._max_yahoo_results:
2410 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
2411 n = self._max_yahoo_results
2412 self._download_n_results(query, n)
2414 except ValueError: # parsing prefix as integer fails
2415 self._download_n_results(query, 1)
2418 def _download_n_results(self, query, n):
2419 """Downloads a specified number of results for a query"""
2422 already_seen = set()
2426 self.report_download_page(query, pagenum)
2427 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2428 request = urllib2.Request(result_url)
2430 page = urllib2.urlopen(request).read()
2431 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2432 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2435 # Extract video identifiers
2436 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2437 video_id = mobj.group(1)
2438 if video_id not in already_seen:
2439 video_ids.append(video_id)
2440 already_seen.add(video_id)
2441 if len(video_ids) == n:
2442 # Specified n videos reached
2443 for id in video_ids:
2444 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2447 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2448 for id in video_ids:
2449 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2452 pagenum = pagenum + 1
2455 class YoutubePlaylistIE(InfoExtractor):
2456 """Information Extractor for YouTube playlists."""
2458 _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2459 _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2460 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2461 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2463 IE_NAME = u'youtube:playlist'
2465 def __init__(self, youtube_ie, downloader=None):
2466 InfoExtractor.__init__(self, downloader)
2467 self._youtube_ie = youtube_ie
2469 def report_download_page(self, playlist_id, pagenum):
2470 """Report attempt to download playlist page with given number."""
2471 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2473 def _real_initialize(self):
2474 self._youtube_ie.initialize()
2476 def _real_extract(self, url):
2477 # Extract playlist id
2478 mobj = re.match(self._VALID_URL, url)
2480 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2484 if mobj.group(3) is not None:
2485 self._youtube_ie.extract(mobj.group(3))
2488 # Download playlist pages
2489 # prefix is 'p' as default for playlists but there are other types that need extra care
2490 playlist_prefix = mobj.group(1)
2491 if playlist_prefix == 'a':
2492 playlist_access = 'artist'
2494 playlist_prefix = 'p'
2495 playlist_access = 'view_play_list'
2496 playlist_id = mobj.group(2)
2501 self.report_download_page(playlist_id, pagenum)
2502 url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)
2503 request = urllib2.Request(url)
2505 page = urllib2.urlopen(request).read()
2506 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2507 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2510 # Extract video identifiers
2512 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2513 if mobj.group(1) not in ids_in_page:
2514 ids_in_page.append(mobj.group(1))
2515 video_ids.extend(ids_in_page)
2517 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2519 pagenum = pagenum + 1
2521 playliststart = self._downloader.params.get('playliststart', 1) - 1
2522 playlistend = self._downloader.params.get('playlistend', -1)
2523 video_ids = video_ids[playliststart:playlistend]
2525 for id in video_ids:
2526 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2530 class YoutubeUserIE(InfoExtractor):
2531 """Information Extractor for YouTube users."""
2533 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2534 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2535 _GDATA_PAGE_SIZE = 50
2536 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2537 _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
2539 IE_NAME = u'youtube:user'
2541 def __init__(self, youtube_ie, downloader=None):
2542 InfoExtractor.__init__(self, downloader)
2543 self._youtube_ie = youtube_ie
2545 def report_download_page(self, username, start_index):
2546 """Report attempt to download user page."""
2547 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2548 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2550 def _real_initialize(self):
2551 self._youtube_ie.initialize()
2553 def _real_extract(self, url):
2555 mobj = re.match(self._VALID_URL, url)
2557 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2560 username = mobj.group(1)
2562 # Download video ids using YouTube Data API. Result size per
2563 # query is limited (currently to 50 videos) so we need to query
2564 # page by page until there are no video ids - it means we got
2571 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2572 self.report_download_page(username, start_index)
2574 request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2577 page = urllib2.urlopen(request).read()
2578 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2579 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2582 # Extract video identifiers
2585 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2586 if mobj.group(1) not in ids_in_page:
2587 ids_in_page.append(mobj.group(1))
2589 video_ids.extend(ids_in_page)
2591 # A little optimization - if current page is not
2592 # "full", ie. does not contain PAGE_SIZE video ids then
2593 # we can assume that this page is the last one - there
2594 # are no more ids on further pages - no need to query
2597 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2602 all_ids_count = len(video_ids)
2603 playliststart = self._downloader.params.get('playliststart', 1) - 1
2604 playlistend = self._downloader.params.get('playlistend', -1)
2606 if playlistend == -1:
2607 video_ids = video_ids[playliststart:]
2609 video_ids = video_ids[playliststart:playlistend]
2611 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2612 (username, all_ids_count, len(video_ids)))
2614 for video_id in video_ids:
2615 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2618 class DepositFilesIE(InfoExtractor):
2619 """Information extractor for depositfiles.com"""
2621 _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
2622 IE_NAME = u'DepositFiles'
2624 def __init__(self, downloader=None):
2625 InfoExtractor.__init__(self, downloader)
2627 def report_download_webpage(self, file_id):
2628 """Report webpage download."""
2629 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2631 def report_extraction(self, file_id):
2632 """Report information extraction."""
2633 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2635 def _real_extract(self, url):
2636 # At this point we have a new file
2637 self._downloader.increment_downloads()
2639 file_id = url.split('/')[-1]
2640 # Rebuild url in english locale
2641 url = 'http://depositfiles.com/en/files/' + file_id
2643 # Retrieve file webpage with 'Free download' button pressed
2644 free_download_indication = { 'gateway_result' : '1' }
2645 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2647 self.report_download_webpage(file_id)
2648 webpage = urllib2.urlopen(request).read()
2649 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2650 self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2653 # Search for the real file URL
2654 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2655 if (mobj is None) or (mobj.group(1) is None):
2656 # Try to figure out reason of the error.
2657 mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2658 if (mobj is not None) and (mobj.group(1) is not None):
2659 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2660 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2662 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2665 file_url = mobj.group(1)
2666 file_extension = os.path.splitext(file_url)[1][1:]
2668 # Search for file title
2669 mobj = re.search(r'<b title="(.*?)">', webpage)
2671 self._downloader.trouble(u'ERROR: unable to extract title')
2673 file_title = mobj.group(1).decode('utf-8')
2676 # Process file information
2677 self._downloader.process_info({
2678 'id': file_id.decode('utf-8'),
2679 'url': file_url.decode('utf-8'),
2681 'upload_date': u'NA',
2682 'title': file_title,
2683 'stitle': file_title,
2684 'ext': file_extension.decode('utf-8'),
2688 except UnavailableVideoError, err:
2689 self._downloader.trouble(u'ERROR: unable to download file')
2692 class FacebookIE(InfoExtractor):
2693 """Information Extractor for Facebook"""
2695 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2696 _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2697 _NETRC_MACHINE = 'facebook'
2698 _available_formats = ['video', 'highqual', 'lowqual']
2699 _video_extensions = {
2704 IE_NAME = u'facebook'
2706 def __init__(self, downloader=None):
2707 InfoExtractor.__init__(self, downloader)
2709 def _reporter(self, message):
2710 """Add header and report message."""
2711 self._downloader.to_screen(u'[facebook] %s' % message)
2713 def report_login(self):
2714 """Report attempt to log in."""
2715 self._reporter(u'Logging in')
2717 def report_video_webpage_download(self, video_id):
2718 """Report attempt to download video webpage."""
2719 self._reporter(u'%s: Downloading video webpage' % video_id)
2721 def report_information_extraction(self, video_id):
2722 """Report attempt to extract video information."""
2723 self._reporter(u'%s: Extracting video information' % video_id)
2725 def _parse_page(self, video_webpage):
2726 """Extract video information from page"""
2728 data = {'title': r'\("video_title", "(.*?)"\)',
2729 'description': r'<div class="datawrap">(.*?)</div>',
2730 'owner': r'\("video_owner_name", "(.*?)"\)',
2731 'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2734 for piece in data.keys():
2735 mobj = re.search(data[piece], video_webpage)
2736 if mobj is not None:
2737 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2741 for fmt in self._available_formats:
2742 mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2743 if mobj is not None:
2744 # URL is in a Javascript segment inside an escaped Unicode format within
2745 # the generally utf-8 page
2746 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2747 video_info['video_urls'] = video_urls
2751 def _real_initialize(self):
2752 if self._downloader is None:
2757 downloader_params = self._downloader.params
2759 # Attempt to use provided username and password or .netrc data
2760 if downloader_params.get('username', None) is not None:
2761 useremail = downloader_params['username']
2762 password = downloader_params['password']
2763 elif downloader_params.get('usenetrc', False):
2765 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2766 if info is not None:
2770 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2771 except (IOError, netrc.NetrcParseError), err:
2772 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2775 if useremail is None:
2784 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2787 login_results = urllib2.urlopen(request).read()
2788 if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2789 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2791 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2792 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2795 def _real_extract(self, url):
2796 mobj = re.match(self._VALID_URL, url)
2798 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2800 video_id = mobj.group('ID')
2803 self.report_video_webpage_download(video_id)
2804 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2806 page = urllib2.urlopen(request)
2807 video_webpage = page.read()
2808 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2809 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2812 # Start extracting information
2813 self.report_information_extraction(video_id)
2815 # Extract information
2816 video_info = self._parse_page(video_webpage)
2819 if 'owner' not in video_info:
2820 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2822 video_uploader = video_info['owner']
2825 if 'title' not in video_info:
2826 self._downloader.trouble(u'ERROR: unable to extract video title')
2828 video_title = video_info['title']
2829 video_title = video_title.decode('utf-8')
2830 video_title = sanitize_title(video_title)
2832 simple_title = _simplify_title(video_title)
2835 if 'thumbnail' not in video_info:
2836 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2837 video_thumbnail = ''
2839 video_thumbnail = video_info['thumbnail']
2843 if 'upload_date' in video_info:
2844 upload_time = video_info['upload_date']
2845 timetuple = email.utils.parsedate_tz(upload_time)
2846 if timetuple is not None:
2848 upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2853 video_description = video_info.get('description', 'No description available.')
2855 url_map = video_info['video_urls']
2856 if len(url_map.keys()) > 0:
2857 # Decide which formats to download
2858 req_format = self._downloader.params.get('format', None)
2859 format_limit = self._downloader.params.get('format_limit', None)
2861 if format_limit is not None and format_limit in self._available_formats:
2862 format_list = self._available_formats[self._available_formats.index(format_limit):]
2864 format_list = self._available_formats
2865 existing_formats = [x for x in format_list if x in url_map]
2866 if len(existing_formats) == 0:
2867 self._downloader.trouble(u'ERROR: no known formats available for video')
2869 if req_format is None:
2870 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2871 elif req_format == 'worst':
2872 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
2873 elif req_format == '-1':
2874 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2877 if req_format not in url_map:
2878 self._downloader.trouble(u'ERROR: requested format not available')
2880 video_url_list = [(req_format, url_map[req_format])] # Specific format
2882 for format_param, video_real_url in video_url_list:
2884 # At this point we have a new video
2885 self._downloader.increment_downloads()
2888 video_extension = self._video_extensions.get(format_param, 'mp4')
2891 # Process video information
2892 self._downloader.process_info({
2893 'id': video_id.decode('utf-8'),
2894 'url': video_real_url.decode('utf-8'),
2895 'uploader': video_uploader.decode('utf-8'),
2896 'upload_date': upload_date,
2897 'title': video_title,
2898 'stitle': simple_title,
2899 'ext': video_extension.decode('utf-8'),
2900 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
2901 'thumbnail': video_thumbnail.decode('utf-8'),
2902 'description': video_description.decode('utf-8'),
2905 except UnavailableVideoError, err:
2906 self._downloader.trouble(u'\nERROR: unable to download video')
2908 class BlipTVIE(InfoExtractor):
2909 """Information extractor for blip.tv"""
2911 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
2912 _URL_EXT = r'^.*\.([a-z0-9]+)$'
2913 IE_NAME = u'blip.tv'
2915 def report_extraction(self, file_id):
2916 """Report information extraction."""
2917 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
2919 def report_direct_download(self, title):
2920 """Report information extraction."""
2921 self._downloader.to_screen(u'[%s] %s: Direct download detected' % (self.IE_NAME, title))
2923 def _real_extract(self, url):
2924 mobj = re.match(self._VALID_URL, url)
2926 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2933 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
2934 request = urllib2.Request(json_url)
2935 self.report_extraction(mobj.group(1))
2938 urlh = urllib2.urlopen(request)
2939 if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
2940 basename = url.split('/')[-1]
2941 title,ext = os.path.splitext(basename)
2942 title = title.decode('UTF-8')
2943 ext = ext.replace('.', '')
2944 self.report_direct_download(title)
2949 'stitle': _simplify_title(title),
2953 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2954 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
2956 if info is None: # Regular URL
2958 json_code = urlh.read()
2959 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2960 self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err))
2964 json_data = json.loads(json_code)
2965 if 'Post' in json_data:
2966 data = json_data['Post']
2970 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
2971 video_url = data['media']['url']
2972 umobj = re.match(self._URL_EXT, video_url)
2974 raise ValueError('Can not determine filename extension')
2975 ext = umobj.group(1)
2978 'id': data['item_id'],
2980 'uploader': data['display_name'],
2981 'upload_date': upload_date,
2982 'title': data['title'],
2983 'stitle': _simplify_title(data['title']),
2985 'format': data['media']['mimeType'],
2986 'thumbnail': data['thumbnailUrl'],
2987 'description': data['description'],
2988 'player_url': data['embedUrl']
2990 except (ValueError,KeyError), err:
2991 self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
2994 self._downloader.increment_downloads()
2997 self._downloader.process_info(info)
2998 except UnavailableVideoError, err:
2999 self._downloader.trouble(u'\nERROR: unable to download video')
3002 class MyVideoIE(InfoExtractor):
3003 """Information Extractor for myvideo.de."""
3005 _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
3006 IE_NAME = u'myvideo'
3008 def __init__(self, downloader=None):
3009 InfoExtractor.__init__(self, downloader)
3011 def report_download_webpage(self, video_id):
3012 """Report webpage download."""
3013 self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
3015 def report_extraction(self, video_id):
3016 """Report information extraction."""
3017 self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
3019 def _real_extract(self,url):
3020 mobj = re.match(self._VALID_URL, url)
3022 self._download.trouble(u'ERROR: invalid URL: %s' % url)
3025 video_id = mobj.group(1)
3028 request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
3030 self.report_download_webpage(video_id)
3031 webpage = urllib2.urlopen(request).read()
3032 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3033 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
3036 self.report_extraction(video_id)
3037 mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
3040 self._downloader.trouble(u'ERROR: unable to extract media URL')
3042 video_url = mobj.group(1) + ('/%s.flv' % video_id)
3044 mobj = re.search('<title>([^<]+)</title>', webpage)
3046 self._downloader.trouble(u'ERROR: unable to extract title')
3049 video_title = mobj.group(1)
3050 video_title = sanitize_title(video_title)
3052 simple_title = _simplify_title(video_title)
3055 self._downloader.process_info({
3059 'upload_date': u'NA',
3060 'title': video_title,
3061 'stitle': simple_title,
3066 except UnavailableVideoError:
3067 self._downloader.trouble(u'\nERROR: Unable to download video')
3069 class ComedyCentralIE(InfoExtractor):
3070 """Information extractor for The Daily Show and Colbert Report """
3072 _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
3073 IE_NAME = u'comedycentral'
3075 def report_extraction(self, episode_id):
3076 self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
3078 def report_config_download(self, episode_id):
3079 self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
3081 def report_index_download(self, episode_id):
3082 self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
3084 def report_player_url(self, episode_id):
3085 self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
3087 def _real_extract(self, url):
3088 mobj = re.match(self._VALID_URL, url)
3090 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3093 if mobj.group('shortname'):
3094 if mobj.group('shortname') in ('tds', 'thedailyshow'):
3095 url = u'http://www.thedailyshow.com/full-episodes/'
3097 url = u'http://www.colbertnation.com/full-episodes/'
3098 mobj = re.match(self._VALID_URL, url)
3099 assert mobj is not None
3101 dlNewest = not mobj.group('episode')
3103 epTitle = mobj.group('showname')
3105 epTitle = mobj.group('episode')
3107 req = urllib2.Request(url)
3108 self.report_extraction(epTitle)
3110 htmlHandle = urllib2.urlopen(req)
3111 html = htmlHandle.read()
3112 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3113 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3116 url = htmlHandle.geturl()
3117 mobj = re.match(self._VALID_URL, url)
3119 self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
3121 if mobj.group('episode') == '':
3122 self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
3124 epTitle = mobj.group('episode')
3126 mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"/>', html)
3127 if len(mMovieParams) == 0:
3128 self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
3131 playerUrl_raw = mMovieParams[0][0]
3132 self.report_player_url(epTitle)
3134 urlHandle = urllib2.urlopen(playerUrl_raw)
3135 playerUrl = urlHandle.geturl()
3136 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3137 self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
3140 uri = mMovieParams[0][1]
3141 indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
3142 self.report_index_download(epTitle)
3144 indexXml = urllib2.urlopen(indexUrl).read()
3145 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3146 self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
3149 idoc = xml.etree.ElementTree.fromstring(indexXml)
3150 itemEls = idoc.findall('.//item')
3151 for itemEl in itemEls:
3152 mediaId = itemEl.findall('./guid')[0].text
3153 shortMediaId = mediaId.split(':')[-1]
3154 showId = mediaId.split(':')[-2].replace('.com', '')
3155 officialTitle = itemEl.findall('./title')[0].text
3156 officialDate = itemEl.findall('./pubDate')[0].text
3158 configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
3159 urllib.urlencode({'uri': mediaId}))
3160 configReq = urllib2.Request(configUrl)
3161 self.report_config_download(epTitle)
3163 configXml = urllib2.urlopen(configReq).read()
3164 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3165 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3168 cdoc = xml.etree.ElementTree.fromstring(configXml)
3170 for rendition in cdoc.findall('.//rendition'):
3171 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
3175 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
3178 # For now, just pick the highest bitrate
3179 format,video_url = turls[-1]
3181 self._downloader.increment_downloads()
3183 effTitle = showId + u'-' + epTitle
3188 'upload_date': officialDate,
3190 'stitle': _simplify_title(effTitle),
3194 'description': officialTitle,
3195 'player_url': playerUrl
3199 self._downloader.process_info(info)
3200 except UnavailableVideoError, err:
3201 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
3205 class EscapistIE(InfoExtractor):
3206 """Information extractor for The Escapist """
3208 _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
3209 IE_NAME = u'escapist'
3211 def report_extraction(self, showName):
3212 self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
3214 def report_config_download(self, showName):
3215 self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
3217 def _real_extract(self, url):
3218 htmlParser = HTMLParser.HTMLParser()
3220 mobj = re.match(self._VALID_URL, url)
3222 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3224 showName = mobj.group('showname')
3225 videoId = mobj.group('episode')
3227 self.report_extraction(showName)
3229 webPage = urllib2.urlopen(url).read()
3230 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3231 self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
3234 descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
3235 description = htmlParser.unescape(descMatch.group(1))
3236 imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
3237 imgUrl = htmlParser.unescape(imgMatch.group(1))
3238 playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
3239 playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
3240 configUrlMatch = re.search('config=(.*)$', playerUrl)
3241 configUrl = urllib2.unquote(configUrlMatch.group(1))
3243 self.report_config_download(showName)
3245 configJSON = urllib2.urlopen(configUrl).read()
3246 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3247 self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
3250 # Technically, it's JavaScript, not JSON
3251 configJSON = configJSON.replace("'", '"')
3254 config = json.loads(configJSON)
3255 except (ValueError,), err:
3256 self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
3259 playlist = config['playlist']
3260 videoUrl = playlist[1]['url']
3262 self._downloader.increment_downloads()
3266 'uploader': showName,
3267 'upload_date': None,
3269 'stitle': _simplify_title(showName),
3272 'thumbnail': imgUrl,
3273 'description': description,
3274 'player_url': playerUrl,
3278 self._downloader.process_info(info)
3279 except UnavailableVideoError, err:
3280 self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
3283 class CollegeHumorIE(InfoExtractor):
3284 """Information extractor for collegehumor.com"""
3286 _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
3287 IE_NAME = u'collegehumor'
3289 def report_webpage(self, video_id):
3290 """Report information extraction."""
3291 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3293 def report_extraction(self, video_id):
3294 """Report information extraction."""
3295 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3297 def _real_extract(self, url):
3298 htmlParser = HTMLParser.HTMLParser()
3300 mobj = re.match(self._VALID_URL, url)
3302 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3304 video_id = mobj.group('videoid')
3306 self.report_webpage(video_id)
3307 request = urllib2.Request(url)
3309 webpage = urllib2.urlopen(request).read()
3310 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3311 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3314 m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
3316 self._downloader.trouble(u'ERROR: Cannot extract internal video ID')
3318 internal_video_id = m.group('internalvideoid')
3322 'internal_id': internal_video_id,
3325 self.report_extraction(video_id)
3326 xmlUrl = 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id
3328 metaXml = urllib2.urlopen(xmlUrl).read()
3329 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3330 self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
3333 mdoc = xml.etree.ElementTree.fromstring(metaXml)
3335 videoNode = mdoc.findall('./video')[0]
3336 info['description'] = videoNode.findall('./description')[0].text
3337 info['title'] = videoNode.findall('./caption')[0].text
3338 info['stitle'] = _simplify_title(info['title'])
3339 info['url'] = videoNode.findall('./file')[0].text
3340 info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
3341 info['ext'] = info['url'].rpartition('.')[2]
3342 info['format'] = info['ext']
3344 self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
3347 self._downloader.increment_downloads()
3350 self._downloader.process_info(info)
3351 except UnavailableVideoError, err:
3352 self._downloader.trouble(u'\nERROR: unable to download video')
3355 class XVideosIE(InfoExtractor):
3356 """Information extractor for xvideos.com"""
3358 _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
3359 IE_NAME = u'xvideos'
3361 def report_webpage(self, video_id):
3362 """Report information extraction."""
3363 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3365 def report_extraction(self, video_id):
3366 """Report information extraction."""
3367 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3369 def _real_extract(self, url):
3370 htmlParser = HTMLParser.HTMLParser()
3372 mobj = re.match(self._VALID_URL, url)
3374 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3376 video_id = mobj.group(1).decode('utf-8')
3378 self.report_webpage(video_id)
3380 request = urllib2.Request(r'http://www.xvideos.com/video' + video_id)
3382 webpage = urllib2.urlopen(request).read()
3383 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3384 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3387 self.report_extraction(video_id)
3391 mobj = re.search(r'flv_url=(.+?)&', webpage)
3393 self._downloader.trouble(u'ERROR: unable to extract video url')
3395 video_url = urllib2.unquote(mobj.group(1).decode('utf-8'))
3399 mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage)
3401 self._downloader.trouble(u'ERROR: unable to extract video title')
3403 video_title = mobj.group(1).decode('utf-8')
3406 # Extract video thumbnail
3407 mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]/[a-fA-F0-9]/[a-fA-F0-9]/([a-fA-F0-9.]+jpg)', webpage)
3409 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
3411 video_thumbnail = mobj.group(1).decode('utf-8')
3415 self._downloader.increment_downloads()
3420 'upload_date': None,
3421 'title': video_title,
3422 'stitle': _simplify_title(video_title),
3425 'thumbnail': video_thumbnail,
3426 'description': None,
3431 self._downloader.process_info(info)
3432 except UnavailableVideoError, err:
3433 self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
3436 class SoundcloudIE(InfoExtractor):
3437 """Information extractor for soundcloud.com
3438 To access the media, the uid of the song and a stream token
3439 must be extracted from the page source and the script must make
3440 a request to media.soundcloud.com/crossdomain.xml. Then
3441 the media can be grabbed by requesting from an url composed
3442 of the stream token and uid
3445 _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
3446 IE_NAME = u'soundcloud'
3448 def __init__(self, downloader=None):
3449 InfoExtractor.__init__(self, downloader)
3451 def report_webpage(self, video_id):
3452 """Report information extraction."""
3453 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3455 def report_extraction(self, video_id):
3456 """Report information extraction."""
3457 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3459 def _real_extract(self, url):
3460 htmlParser = HTMLParser.HTMLParser()
3462 mobj = re.match(self._VALID_URL, url)
3464 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3467 # extract uploader (which is in the url)
3468 uploader = mobj.group(1).decode('utf-8')
3469 # extract simple title (uploader + slug of song title)
3470 slug_title = mobj.group(2).decode('utf-8')
3471 simple_title = uploader + '-' + slug_title
3473 self.report_webpage('%s/%s' % (uploader, slug_title))
3475 request = urllib2.Request('http://soundcloud.com/%s/%s' % (uploader, slug_title))
3477 webpage = urllib2.urlopen(request).read()
3478 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3479 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3482 self.report_extraction('%s/%s' % (uploader, slug_title))
3484 # extract uid and stream token that soundcloud hands out for access
3485 mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', webpage)
3487 video_id = mobj.group(1)
3488 stream_token = mobj.group(2)
3490 # extract unsimplified title
3491 mobj = re.search('"title":"(.*?)",', webpage)
3493 title = mobj.group(1)
3495 # construct media url (with uid/token)
3496 mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s"
3497 mediaURL = mediaURL % (video_id, stream_token)
3500 description = u'No description available'
3501 mobj = re.search('track-description-value"><p>(.*?)</p>', webpage)
3503 description = mobj.group(1)
3507 mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
3510 upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
3511 except Exception, e:
3514 # for soundcloud, a request to a cross domain is required for cookies
3515 request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
3518 self._downloader.process_info({
3519 'id': video_id.decode('utf-8'),
3521 'uploader': uploader.decode('utf-8'),
3522 'upload_date': upload_date,
3523 'title': simple_title.decode('utf-8'),
3524 'stitle': simple_title.decode('utf-8'),
3528 'description': description.decode('utf-8')
3530 except UnavailableVideoError:
3531 self._downloader.trouble(u'\nERROR: unable to download video')
3534 class InfoQIE(InfoExtractor):
3535 """Information extractor for infoq.com"""
3537 _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
3540 def report_webpage(self, video_id):
3541 """Report information extraction."""
3542 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3544 def report_extraction(self, video_id):
3545 """Report information extraction."""
3546 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3548 def _real_extract(self, url):
3549 htmlParser = HTMLParser.HTMLParser()
3551 mobj = re.match(self._VALID_URL, url)
3553 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3556 self.report_webpage(url)
3558 request = urllib2.Request(url)
3560 webpage = urllib2.urlopen(request).read()
3561 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3562 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3565 self.report_extraction(url)
3569 mobj = re.search(r"jsclassref='([^']*)'", webpage)
3571 self._downloader.trouble(u'ERROR: unable to extract video url')
3573 video_url = 'rtmpe://video.infoq.com/cfx/st/' + urllib2.unquote(mobj.group(1).decode('base64'))
3577 mobj = re.search(r'contentTitle = "(.*?)";', webpage)
3579 self._downloader.trouble(u'ERROR: unable to extract video title')
3581 video_title = mobj.group(1).decode('utf-8')
3583 # Extract description
3584 video_description = u'No description available.'
3585 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', webpage)
3586 if mobj is not None:
3587 video_description = mobj.group(1).decode('utf-8')
3589 video_filename = video_url.split('/')[-1]
3590 video_id, extension = video_filename.split('.')
3592 self._downloader.increment_downloads()
3597 'upload_date': None,
3598 'title': video_title,
3599 'stitle': _simplify_title(video_title),
3601 'format': extension, # Extension is always(?) mp4, but seems to be flv
3603 'description': video_description,
3608 self._downloader.process_info(info)
3609 except UnavailableVideoError, err:
3610 self._downloader.trouble(u'\nERROR: unable to download ' + video_url)
3612 class MixcloudIE(InfoExtractor):
3613 """Information extractor for www.mixcloud.com"""
3614 _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
3615 IE_NAME = u'mixcloud'
3617 def __init__(self, downloader=None):
3618 InfoExtractor.__init__(self, downloader)
3620 def report_download_json(self, file_id):
3621 """Report JSON download."""
3622 self._downloader.to_screen(u'[%s] Downloading json' % self.IE_NAME)
3624 def report_extraction(self, file_id):
3625 """Report information extraction."""
3626 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
3628 def get_urls(self, jsonData, fmt, bitrate='best'):
3629 """Get urls from 'audio_formats' section in json"""
3632 bitrate_list = jsonData[fmt]
3633 if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
3634 bitrate = max(bitrate_list) # select highest
3636 url_list = jsonData[fmt][bitrate]
3637 except TypeError: # we have no bitrate info.
3638 url_list = jsonData[fmt]
3642 def check_urls(self, url_list):
3643 """Returns 1st active url from list"""
3644 for url in url_list:
3646 urllib2.urlopen(url)
3648 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3653 def _print_formats(self, formats):
3654 print 'Available formats:'
3655 for fmt in formats.keys():
3656 for b in formats[fmt]:
3658 ext = formats[fmt][b][0]
3659 print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])
3660 except TypeError: # we have no bitrate info
3661 ext = formats[fmt][0]
3662 print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])
3665 def _real_extract(self, url):
3666 mobj = re.match(self._VALID_URL, url)
3668 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3670 # extract uploader & filename from url
3671 uploader = mobj.group(1).decode('utf-8')
3672 file_id = uploader + "-" + mobj.group(2).decode('utf-8')
3674 # construct API request
3675 file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
3676 # retrieve .json file with links to files
3677 request = urllib2.Request(file_url)
3679 self.report_download_json(file_url)
3680 jsonData = urllib2.urlopen(request).read()
3681 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3682 self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err))
3686 json_data = json.loads(jsonData)
3687 player_url = json_data['player_swf_url']
3688 formats = dict(json_data['audio_formats'])
3690 req_format = self._downloader.params.get('format', None)
3693 if self._downloader.params.get('listformats', None):
3694 self._print_formats(formats)
3697 if req_format is None or req_format == 'best':
3698 for format_param in formats.keys():
3699 url_list = self.get_urls(formats, format_param)
3701 file_url = self.check_urls(url_list)
3702 if file_url is not None:
3705 if req_format not in formats.keys():
3706 self._downloader.trouble(u'ERROR: format is not available')
3709 url_list = self.get_urls(formats, req_format)
3710 file_url = self.check_urls(url_list)
3711 format_param = req_format
3714 self._downloader.increment_downloads()
3716 # Process file information
3717 self._downloader.process_info({
3718 'id': file_id.decode('utf-8'),
3719 'url': file_url.decode('utf-8'),
3720 'uploader': uploader.decode('utf-8'),
3721 'upload_date': u'NA',
3722 'title': json_data['name'],
3723 'stitle': _simplify_title(json_data['name']),
3724 'ext': file_url.split('.')[-1].decode('utf-8'),
3725 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
3726 'thumbnail': json_data['thumbnail_url'],
3727 'description': json_data['description'],
3728 'player_url': player_url.decode('utf-8'),
3730 except UnavailableVideoError, err:
3731 self._downloader.trouble(u'ERROR: unable to download file')
3735 class PostProcessor(object):
3736 """Post Processor class.
3738 PostProcessor objects can be added to downloaders with their
3739 add_post_processor() method. When the downloader has finished a
3740 successful download, it will take its internal chain of PostProcessors
3741 and start calling the run() method on each one of them, first with
3742 an initial argument and then with the returned value of the previous
3745 The chain will be stopped if one of them ever returns None or the end
3746 of the chain is reached.
3748 PostProcessor objects follow a "mutual registration" process similar
3749 to InfoExtractor objects.
3754 def __init__(self, downloader=None):
3755 self._downloader = downloader
3757 def set_downloader(self, downloader):
3758 """Sets the downloader for this PP."""
3759 self._downloader = downloader
3761 def run(self, information):
3762 """Run the PostProcessor.
3764 The "information" argument is a dictionary like the ones
3765 composed by InfoExtractors. The only difference is that this
3766 one has an extra field called "filepath" that points to the
3769 When this method returns None, the postprocessing chain is
3770 stopped. However, this method may return an information
3771 dictionary that will be passed to the next postprocessing
3772 object in the chain. It can be the one it received after
3773 changing some fields.
3775 In addition, this method may raise a PostProcessingError
3776 exception that will be taken into account by the downloader
3779 return information # by default, do nothing
3782 class FFmpegExtractAudioPP(PostProcessor):
3784 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
3785 PostProcessor.__init__(self, downloader)
3786 if preferredcodec is None:
3787 preferredcodec = 'best'
3788 self._preferredcodec = preferredcodec
3789 self._preferredquality = preferredquality
3790 self._keepvideo = keepvideo
3793 def get_audio_codec(path):
3795 cmd = ['ffprobe', '-show_streams', '--', path]
3796 handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
3797 output = handle.communicate()[0]
3798 if handle.wait() != 0:
3800 except (IOError, OSError):
3803 for line in output.split('\n'):
3804 if line.startswith('codec_name='):
3805 audio_codec = line.split('=')[1].strip()
3806 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
3811 def run_ffmpeg(path, out_path, codec, more_opts):
3813 cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
3814 ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
3816 except (IOError, OSError):
3819 def run(self, information):
3820 path = information['filepath']
3822 filecodec = self.get_audio_codec(path)
3823 if filecodec is None:
3824 self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
3828 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
3829 if filecodec in ['aac', 'mp3', 'vorbis']:
3830 # Lossless if possible
3832 extension = filecodec
3833 if filecodec == 'aac':
3834 more_opts = ['-f', 'adts']
3835 if filecodec == 'vorbis':
3839 acodec = 'libmp3lame'
3842 if self._preferredquality is not None:
3843 more_opts += ['-ab', self._preferredquality]
3845 # We convert the audio (lossy)
3846 acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'vorbis': 'libvorbis'}[self._preferredcodec]
3847 extension = self._preferredcodec
3849 if self._preferredquality is not None:
3850 more_opts += ['-ab', self._preferredquality]
3851 if self._preferredcodec == 'aac':
3852 more_opts += ['-f', 'adts']
3853 if self._preferredcodec == 'vorbis':
3856 (prefix, ext) = os.path.splitext(path)
3857 new_path = prefix + '.' + extension
3858 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
3859 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
3862 self._downloader.to_stderr(u'WARNING: error running ffmpeg')
3865 # Try to update the date time for extracted audio file.
3866 if information.get('filetime') is not None:
3868 os.utime(new_path, (time.time(), information['filetime']))
3870 self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
3872 if not self._keepvideo:
3875 except (IOError, OSError):
3876 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
3879 information['filepath'] = new_path
3883 def updateSelf(downloader, filename):
3884 ''' Update the program file with the latest version from the repository '''
3885 # Note: downloader only used for options
3886 if not os.access(filename, os.W_OK):
3887 sys.exit('ERROR: no write permissions on %s' % filename)
3889 downloader.to_screen('Updating to latest version...')
3893 urlh = urllib.urlopen(UPDATE_URL)
3894 newcontent = urlh.read()
3896 vmatch = re.search("__version__ = '([^']+)'", newcontent)
3897 if vmatch is not None and vmatch.group(1) == __version__:
3898 downloader.to_screen('youtube-dl is up-to-date (' + __version__ + ')')
3902 except (IOError, OSError), err:
3903 sys.exit('ERROR: unable to download latest version')
3906 outf = open(filename, 'wb')
3908 outf.write(newcontent)
3911 except (IOError, OSError), err:
3912 sys.exit('ERROR: unable to overwrite current version')
3914 downloader.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
3921 def _format_option_string(option):
3922 ''' ('-o', '--option') -> -o, --format METAVAR'''
3926 if option._short_opts: opts.append(option._short_opts[0])
3927 if option._long_opts: opts.append(option._long_opts[0])
3928 if len(opts) > 1: opts.insert(1, ', ')
3930 if option.takes_value(): opts.append(' %s' % option.metavar)
3932 return "".join(opts)
3934 def _find_term_columns():
3935 columns = os.environ.get('COLUMNS', None)
3940 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3941 out,err = sp.communicate()
3942 return int(out.split()[1])
3948 max_help_position = 80
3950 # No need to wrap help messages if we're on a wide console
3951 columns = _find_term_columns()
3952 if columns: max_width = columns
3954 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
3955 fmt.format_option_strings = _format_option_string
3958 'version' : __version__,
3960 'usage' : '%prog [options] url [url...]',
3961 'conflict_handler' : 'resolve',
3964 parser = optparse.OptionParser(**kw)
3967 general = optparse.OptionGroup(parser, 'General Options')
3968 selection = optparse.OptionGroup(parser, 'Video Selection')
3969 authentication = optparse.OptionGroup(parser, 'Authentication Options')
3970 video_format = optparse.OptionGroup(parser, 'Video Format Options')
3971 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
3972 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
3973 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
3975 general.add_option('-h', '--help',
3976 action='help', help='print this help text and exit')
3977 general.add_option('-v', '--version',
3978 action='version', help='print program version and exit')
3979 general.add_option('-U', '--update',
3980 action='store_true', dest='update_self', help='update this program to latest version')
3981 general.add_option('-i', '--ignore-errors',
3982 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
3983 general.add_option('-r', '--rate-limit',
3984 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
3985 general.add_option('-R', '--retries',
3986 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
3987 general.add_option('--dump-user-agent',
3988 action='store_true', dest='dump_user_agent',
3989 help='display the current browser identification', default=False)
3990 general.add_option('--list-extractors',
3991 action='store_true', dest='list_extractors',
3992 help='List all supported extractors and the URLs they would handle', default=False)
3994 selection.add_option('--playlist-start',
3995 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
3996 selection.add_option('--playlist-end',
3997 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
3998 selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
3999 selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
4001 authentication.add_option('-u', '--username',
4002 dest='username', metavar='USERNAME', help='account username')
4003 authentication.add_option('-p', '--password',
4004 dest='password', metavar='PASSWORD', help='account password')
4005 authentication.add_option('-n', '--netrc',
4006 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
4009 video_format.add_option('-f', '--format',
4010 action='store', dest='format', metavar='FORMAT', help='video format code')
4011 video_format.add_option('--all-formats',
4012 action='store_const', dest='format', help='download all available video formats', const='all')
4013 video_format.add_option('--max-quality',
4014 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
4015 video_format.add_option('-F', '--list-formats',
4016 action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
4019 verbosity.add_option('-q', '--quiet',
4020 action='store_true', dest='quiet', help='activates quiet mode', default=False)
4021 verbosity.add_option('-s', '--simulate',
4022 action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
4023 verbosity.add_option('--skip-download',
4024 action='store_true', dest='skip_download', help='do not download the video', default=False)
4025 verbosity.add_option('-g', '--get-url',
4026 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
4027 verbosity.add_option('-e', '--get-title',
4028 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
4029 verbosity.add_option('--get-thumbnail',
4030 action='store_true', dest='getthumbnail',
4031 help='simulate, quiet but print thumbnail URL', default=False)
4032 verbosity.add_option('--get-description',
4033 action='store_true', dest='getdescription',
4034 help='simulate, quiet but print video description', default=False)
4035 verbosity.add_option('--get-filename',
4036 action='store_true', dest='getfilename',
4037 help='simulate, quiet but print output filename', default=False)
4038 verbosity.add_option('--get-format',
4039 action='store_true', dest='getformat',
4040 help='simulate, quiet but print output format', default=False)
4041 verbosity.add_option('--no-progress',
4042 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
4043 verbosity.add_option('--console-title',
4044 action='store_true', dest='consoletitle',
4045 help='display progress in console titlebar', default=False)
4048 filesystem.add_option('-t', '--title',
4049 action='store_true', dest='usetitle', help='use title in file name', default=False)
4050 filesystem.add_option('-l', '--literal',
4051 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
4052 filesystem.add_option('-A', '--auto-number',
4053 action='store_true', dest='autonumber',
4054 help='number downloaded files starting from 00000', default=False)
4055 filesystem.add_option('-o', '--output',
4056 dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, and %% for a literal percent')
4057 filesystem.add_option('-a', '--batch-file',
4058 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
4059 filesystem.add_option('-w', '--no-overwrites',
4060 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
4061 filesystem.add_option('-c', '--continue',
4062 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
4063 filesystem.add_option('--no-continue',
4064 action='store_false', dest='continue_dl',
4065 help='do not resume partially downloaded files (restart from beginning)')
4066 filesystem.add_option('--cookies',
4067 dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
4068 filesystem.add_option('--no-part',
4069 action='store_true', dest='nopart', help='do not use .part files', default=False)
4070 filesystem.add_option('--no-mtime',
4071 action='store_false', dest='updatetime',
4072 help='do not use the Last-modified header to set the file modification time', default=True)
4073 filesystem.add_option('--write-description',
4074 action='store_true', dest='writedescription',
4075 help='write video description to a .description file', default=False)
4076 filesystem.add_option('--write-info-json',
4077 action='store_true', dest='writeinfojson',
4078 help='write video metadata to a .info.json file', default=False)
4081 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
4082 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
4083 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
4084 help='"best", "aac", "vorbis" or "mp3"; best by default')
4085 postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
4086 help='ffmpeg audio bitrate specification, 128k by default')
4087 postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
4088 help='keeps the video file on disk after the post-processing; the video is erased by default')
4091 parser.add_option_group(general)
4092 parser.add_option_group(selection)
4093 parser.add_option_group(filesystem)
4094 parser.add_option_group(verbosity)
4095 parser.add_option_group(video_format)
4096 parser.add_option_group(authentication)
4097 parser.add_option_group(postproc)
4099 opts, args = parser.parse_args()
4101 return parser, opts, args
4103 def gen_extractors():
4104 """ Return a list of an instance of every supported extractor.
4105 The order does matter; the first extractor matched is the one handling the URL.
4107 youtube_ie = YoutubeIE()
4108 google_ie = GoogleIE()
4109 yahoo_ie = YahooIE()
4111 YoutubePlaylistIE(youtube_ie),
4112 YoutubeUserIE(youtube_ie),
4113 YoutubeSearchIE(youtube_ie),
4115 MetacafeIE(youtube_ie),
4118 GoogleSearchIE(google_ie),
4121 YahooSearchIE(yahoo_ie),
4139 parser, opts, args = parseOpts()
4141 # Open appropriate CookieJar
4142 if opts.cookiefile is None:
4143 jar = cookielib.CookieJar()
4146 jar = cookielib.MozillaCookieJar(opts.cookiefile)
4147 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
4149 except (IOError, OSError), err:
4150 sys.exit(u'ERROR: unable to open cookie file')
4153 if opts.dump_user_agent:
4154 print std_headers['User-Agent']
4157 # Batch file verification
4159 if opts.batchfile is not None:
4161 if opts.batchfile == '-':
4164 batchfd = open(opts.batchfile, 'r')
4165 batchurls = batchfd.readlines()
4166 batchurls = [x.strip() for x in batchurls]
4167 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
4169 sys.exit(u'ERROR: batch file could not be read')
4170 all_urls = batchurls + args
4172 # General configuration
4173 cookie_processor = urllib2.HTTPCookieProcessor(jar)
4174 opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
4175 urllib2.install_opener(opener)
4176 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
4178 extractors = gen_extractors()
4180 if opts.list_extractors:
4181 for ie in extractors:
4183 matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
4184 all_urls = filter(lambda url: url not in matchedUrls, all_urls)
4185 for mu in matchedUrls:
4189 # Conflicting, missing and erroneous options
4190 if opts.usenetrc and (opts.username is not None or opts.password is not None):
4191 parser.error(u'using .netrc conflicts with giving username/password')
4192 if opts.password is not None and opts.username is None:
4193 parser.error(u'account username missing')
4194 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
4195 parser.error(u'using output template conflicts with using title, literal title or auto number')
4196 if opts.usetitle and opts.useliteral:
4197 parser.error(u'using title conflicts with using literal title')
4198 if opts.username is not None and opts.password is None:
4199 opts.password = getpass.getpass(u'Type account password and press return:')
4200 if opts.ratelimit is not None:
4201 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
4202 if numeric_limit is None:
4203 parser.error(u'invalid rate limit specified')
4204 opts.ratelimit = numeric_limit
4205 if opts.retries is not None:
4207 opts.retries = long(opts.retries)
4208 except (TypeError, ValueError), err:
4209 parser.error(u'invalid retry count specified')
4211 opts.playliststart = int(opts.playliststart)
4212 if opts.playliststart <= 0:
4213 raise ValueError(u'Playlist start must be positive')
4214 except (TypeError, ValueError), err:
4215 parser.error(u'invalid playlist start number specified')
4217 opts.playlistend = int(opts.playlistend)
4218 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
4219 raise ValueError(u'Playlist end must be greater than playlist start')
4220 except (TypeError, ValueError), err:
4221 parser.error(u'invalid playlist end number specified')
4222 if opts.extractaudio:
4223 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis']:
4224 parser.error(u'invalid audio format specified')
4227 fd = FileDownloader({
4228 'usenetrc': opts.usenetrc,
4229 'username': opts.username,
4230 'password': opts.password,
4231 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
4232 'forceurl': opts.geturl,
4233 'forcetitle': opts.gettitle,
4234 'forcethumbnail': opts.getthumbnail,
4235 'forcedescription': opts.getdescription,
4236 'forcefilename': opts.getfilename,
4237 'forceformat': opts.getformat,
4238 'simulate': opts.simulate,
4239 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
4240 'format': opts.format,
4241 'format_limit': opts.format_limit,
4242 'listformats': opts.listformats,
4243 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
4244 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
4245 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
4246 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
4247 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
4248 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
4249 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
4250 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
4251 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
4252 or u'%(id)s.%(ext)s'),
4253 'ignoreerrors': opts.ignoreerrors,
4254 'ratelimit': opts.ratelimit,
4255 'nooverwrites': opts.nooverwrites,
4256 'retries': opts.retries,
4257 'continuedl': opts.continue_dl,
4258 'noprogress': opts.noprogress,
4259 'playliststart': opts.playliststart,
4260 'playlistend': opts.playlistend,
4261 'logtostderr': opts.outtmpl == '-',
4262 'consoletitle': opts.consoletitle,
4263 'nopart': opts.nopart,
4264 'updatetime': opts.updatetime,
4265 'writedescription': opts.writedescription,
4266 'writeinfojson': opts.writeinfojson,
4267 'matchtitle': opts.matchtitle,
4268 'rejecttitle': opts.rejecttitle,
4270 for extractor in extractors:
4271 fd.add_info_extractor(extractor)
4274 if opts.extractaudio:
4275 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
4278 if opts.update_self:
4279 updateSelf(fd, sys.argv[0])
4282 if len(all_urls) < 1:
4283 if not opts.update_self:
4284 parser.error(u'you must provide at least one URL')
4287 retcode = fd.download(all_urls)
4289 # Dump cookie jar if requested
4290 if opts.cookiefile is not None:
4293 except (IOError, OSError), err:
4294 sys.exit(u'ERROR: unable to save cookie jar')
4301 except DownloadError:
4303 except SameFileError:
4304 sys.exit(u'ERROR: fixed output name but more than one file to download')
4305 except KeyboardInterrupt:
4306 sys.exit(u'\nERROR: Interrupted by user')
4308 if __name__ == '__main__':
4311 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: