2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # License: Public domain code
23 # parse_qs was moved from the cgi module to the urlparse module recently.
25 from urlparse import parse_qs
27 from cgi import parse_qs
30 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
31 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
32 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
33 'Accept-Language': 'en-us,en;q=0.5',
36 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
38 def preferredencoding():
39 """Get preferred encoding.
41 Returns the best encoding scheme for the system, based on
42 locale.getpreferredencoding() and some further tweaks.
44 def yield_preferredencoding():
46 pref = locale.getpreferredencoding()
52 return yield_preferredencoding().next()
54 def htmlentity_transform(matchobj):
55 """Transforms an HTML entity to a Unicode character.
57 This function receives a match object and is intended to be used with
58 the re.sub() function.
60 entity = matchobj.group(1)
62 # Known non-numeric HTML entity
63 if entity in htmlentitydefs.name2codepoint:
64 return unichr(htmlentitydefs.name2codepoint[entity])
67 mobj = re.match(ur'(?u)#(x?\d+)', entity)
69 numstr = mobj.group(1)
70 if numstr.startswith(u'x'):
72 numstr = u'0%s' % numstr
75 return unichr(long(numstr, base))
77 # Unknown entity in name, return its literal representation
78 return (u'&%s;' % entity)
80 def sanitize_title(utitle):
81 """Sanitizes a video title so it could be used as part of a filename."""
82 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
83 return utitle.replace(unicode(os.sep), u'%')
85 def sanitize_open(filename, open_mode):
86 """Try to open the given filename, and slightly tweak it if this fails.
88 Attempts to open the given filename. If this fails, it tries to change
89 the filename slightly, step by step, until it's either able to open it
90 or it fails and raises a final exception, like the standard open()
93 It returns the tuple (stream, definitive_file_name).
97 return (sys.stdout, filename)
98 stream = open(filename, open_mode)
99 return (stream, filename)
100 except (IOError, OSError), err:
101 # In case of error, try to remove win32 forbidden chars
102 filename = re.sub(ur'[<>:"\|\?\*]', u'#', filename)
104 # An exception here should be caught in the caller
105 stream = open(filename, open_mode)
106 return (stream, filename)
109 class DownloadError(Exception):
110 """Download Error exception.
112 This exception may be thrown by FileDownloader objects if they are not
113 configured to continue on errors. They will contain the appropriate
118 class SameFileError(Exception):
119 """Same File exception.
121 This exception will be thrown by FileDownloader objects if they detect
122 multiple files would have to be downloaded to the same file on disk.
126 class PostProcessingError(Exception):
127 """Post Processing exception.
129 This exception may be raised by PostProcessor's .run() method to
130 indicate an error in the postprocessing task.
134 class UnavailableFormatError(Exception):
135 """Unavailable Format exception.
137 This exception will be thrown when a video is requested
138 in a format that is not available for that video.
142 class ContentTooShortError(Exception):
143 """Content Too Short exception.
145 This exception may be raised by FileDownloader objects when a file they
146 download is too small for what the server announced first, indicating
147 the connection was probably interrupted.
153 def __init__(self, downloaded, expected):
154 self.downloaded = downloaded
155 self.expected = expected
157 class FileDownloader(object):
158 """File Downloader class.
160 File downloader objects are the ones responsible of downloading the
161 actual video file and writing it to disk if the user has requested
162 it, among some other tasks. In most cases there should be one per
163 program. As, given a video URL, the downloader doesn't know how to
164 extract all the needed information, task that InfoExtractors do, it
165 has to pass the URL to one of them.
167 For this, file downloader objects have a method that allows
168 InfoExtractors to be registered in a given order. When it is passed
169 a URL, the file downloader handles it to the first InfoExtractor it
170 finds that reports being able to handle it. The InfoExtractor extracts
171 all the information about the video or videos the URL refers to, and
172 asks the FileDownloader to process the video information, possibly
173 downloading the video.
175 File downloaders accept a lot of parameters. In order not to saturate
176 the object constructor with arguments, it receives a dictionary of
177 options instead. These options are available through the params
178 attribute for the InfoExtractors to use. The FileDownloader also
179 registers itself as the downloader in charge for the InfoExtractors
180 that are added to it, so this is a "mutual registration".
184 username: Username for authentication purposes.
185 password: Password for authentication purposes.
186 usenetrc: Use netrc for authentication instead.
187 quiet: Do not print messages to stdout.
188 forceurl: Force printing final URL.
189 forcetitle: Force printing title.
190 simulate: Do not download the video files.
191 format: Video format code.
192 outtmpl: Template for output names.
193 ignoreerrors: Do not stop on download errors.
194 ratelimit: Download speed limit, in bytes/sec.
195 nooverwrites: Prevent overwriting files.
196 continuedl: Try to continue downloads if possible.
197 noprogress: Do not print the progress bar.
203 _download_retcode = None
204 _num_downloads = None
206 def __init__(self, params):
207 """Create a FileDownloader object with the given options."""
210 self._download_retcode = 0
211 self._num_downloads = 0
215 def pmkdir(filename):
216 """Create directory components in filename. Similar to Unix "mkdir -p"."""
217 components = filename.split(os.sep)
218 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
219 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
220 for dir in aggregate:
221 if not os.path.exists(dir):
225 def format_bytes(bytes):
228 if type(bytes) is str:
233 exponent = long(math.log(bytes, 1024.0))
234 suffix = 'bkMGTPEZY'[exponent]
235 converted = float(bytes) / float(1024**exponent)
236 return '%.2f%s' % (converted, suffix)
239 def calc_percent(byte_counter, data_len):
242 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
245 def calc_eta(start, now, total, current):
249 if current == 0 or dif < 0.001: # One millisecond
251 rate = float(current) / dif
252 eta = long((float(total) - float(current)) / rate)
253 (eta_mins, eta_secs) = divmod(eta, 60)
256 return '%02d:%02d' % (eta_mins, eta_secs)
259 def calc_speed(start, now, bytes):
261 if bytes == 0 or dif < 0.001: # One millisecond
262 return '%10s' % '---b/s'
263 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
266 def best_block_size(elapsed_time, bytes):
267 new_min = max(bytes / 2.0, 1.0)
268 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
269 if elapsed_time < 0.001:
271 rate = bytes / elapsed_time
279 def parse_bytes(bytestr):
280 """Parse a string indicating a byte quantity into a long integer."""
281 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
284 number = float(matchobj.group(1))
285 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
286 return long(round(number * multiplier))
290 """Verify a URL is valid and data could be downloaded. Return real data URL."""
291 request = urllib2.Request(url, None, std_headers)
292 data = urllib2.urlopen(request)
298 def add_info_extractor(self, ie):
299 """Add an InfoExtractor object to the end of the list."""
301 ie.set_downloader(self)
303 def add_post_processor(self, pp):
304 """Add a PostProcessor object to the end of the chain."""
306 pp.set_downloader(self)
308 def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False):
309 """Print message to stdout if not in quiet mode."""
311 if not self.params.get('quiet', False):
312 print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
314 except (UnicodeEncodeError), err:
315 if not ignore_encoding_errors:
318 def to_stderr(self, message):
319 """Print message to stderr."""
320 print >>sys.stderr, message.encode(preferredencoding())
322 def fixed_template(self):
323 """Checks if the output template is fixed."""
324 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
326 def trouble(self, message=None):
327 """Determine action to take when a download problem appears.
329 Depending on if the downloader has been configured to ignore
330 download errors or not, this method may throw an exception or
331 not when errors are found, after printing the message.
333 if message is not None:
334 self.to_stderr(message)
335 if not self.params.get('ignoreerrors', False):
336 raise DownloadError(message)
337 self._download_retcode = 1
339 def slow_down(self, start_time, byte_counter):
340 """Sleep if the download speed is over the rate limit."""
341 rate_limit = self.params.get('ratelimit', None)
342 if rate_limit is None or byte_counter == 0:
345 elapsed = now - start_time
348 speed = float(byte_counter) / elapsed
349 if speed > rate_limit:
350 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
352 def report_destination(self, filename):
353 """Report destination filename."""
354 self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
356 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
357 """Report download progress."""
358 if self.params.get('noprogress', False):
360 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
361 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
363 def report_resuming_byte(self, resume_len):
364 """Report attemtp to resume at given byte."""
365 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
367 def report_file_already_downloaded(self, file_name):
368 """Report file has already been fully downloaded."""
370 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
371 except (UnicodeEncodeError), err:
372 self.to_stdout(u'[download] The file has already been downloaded')
374 def report_unable_to_resume(self):
375 """Report it was impossible to resume download."""
376 self.to_stdout(u'[download] Unable to resume')
378 def report_finish(self):
379 """Report download finished."""
380 if self.params.get('noprogress', False):
381 self.to_stdout(u'[download] Download completed')
385 def process_info(self, info_dict):
386 """Process a single dictionary returned by an InfoExtractor."""
387 # Do nothing else if in simulate mode
388 if self.params.get('simulate', False):
389 # Verify URL if it's an HTTP one
390 if info_dict['url'].startswith('http'):
392 self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
393 except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
394 raise UnavailableFormatError
397 if self.params.get('forcetitle', False):
398 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
399 if self.params.get('forceurl', False):
400 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
401 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
402 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
403 if self.params.get('forcedescription', False) and 'description' in info_dict:
404 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
409 template_dict = dict(info_dict)
410 template_dict['epoch'] = unicode(long(time.time()))
411 template_dict['ord'] = unicode('%05d' % self._num_downloads)
412 filename = self.params['outtmpl'] % template_dict
413 except (ValueError, KeyError), err:
414 self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
415 if self.params.get('nooverwrites', False) and os.path.exists(filename):
416 self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
420 self.pmkdir(filename)
421 except (OSError, IOError), err:
422 self.trouble('ERROR: unable to create directories: %s' % str(err))
426 success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
427 except (OSError, IOError), err:
428 raise UnavailableFormatError
429 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
430 self.trouble('ERROR: unable to download video data: %s' % str(err))
432 except (ContentTooShortError, ), err:
433 self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
438 self.post_process(filename, info_dict)
439 except (PostProcessingError), err:
440 self.trouble('ERROR: postprocessing: %s' % str(err))
443 def download(self, url_list):
444 """Download a given list of URLs."""
445 if len(url_list) > 1 and self.fixed_template():
446 raise SameFileError(self.params['outtmpl'])
449 suitable_found = False
451 # Go to next InfoExtractor if not suitable
452 if not ie.suitable(url):
455 # Suitable InfoExtractor found
456 suitable_found = True
458 # Extract information from URL and process it
461 # Suitable InfoExtractor had been found; go to next URL
464 if not suitable_found:
465 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
467 return self._download_retcode
469 def post_process(self, filename, ie_info):
470 """Run the postprocessing chain on the given file."""
472 info['filepath'] = filename
478 def _download_with_rtmpdump(self, filename, url, player_url):
479 self.report_destination(filename)
481 # Check for rtmpdump first
483 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
484 except (OSError, IOError):
485 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
488 # Download using rtmpdump. rtmpdump returns exit code 2 when
489 # the connection was interrumpted and resuming appears to be
490 # possible. This is part of rtmpdump's normal usage, AFAIK.
491 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename]
492 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
493 while retval == 2 or retval == 1:
494 prevsize = os.path.getsize(filename)
495 self.to_stdout(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
496 time.sleep(5.0) # This seems to be needed
497 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
498 cursize = os.path.getsize(filename)
499 if prevsize == cursize and retval == 1:
502 self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
505 self.trouble('\nERROR: rtmpdump exited with code %d' % retval)
508 def _do_download(self, filename, url, player_url):
509 # Attempt to download using rtmpdump
510 if url.startswith('rtmp'):
511 return self._download_with_rtmpdump(filename, url, player_url)
515 basic_request = urllib2.Request(url, None, std_headers)
516 request = urllib2.Request(url, None, std_headers)
518 # Establish possible resume length
519 if os.path.isfile(filename):
520 resume_len = os.path.getsize(filename)
524 # Request parameters in case of being able to resume
525 if self.params.get('continuedl', False) and resume_len != 0:
526 self.report_resuming_byte(resume_len)
527 request.add_header('Range','bytes=%d-' % resume_len)
530 # Establish connection
532 data = urllib2.urlopen(request)
533 except (urllib2.HTTPError, ), err:
534 if err.code != 416: # 416 is 'Requested range not satisfiable'
537 data = urllib2.urlopen(basic_request)
538 content_length = data.info()['Content-Length']
540 if content_length is not None and long(content_length) == resume_len:
541 # Because the file had already been fully downloaded
542 self.report_file_already_downloaded(filename)
543 self._num_downloads += 1
546 # Because the server didn't let us
547 self.report_unable_to_resume()
550 data_len = data.info().get('Content-length', None)
551 data_len_str = self.format_bytes(data_len)
558 data_block = data.read(block_size)
560 data_block_len = len(data_block)
561 if data_block_len == 0:
563 byte_counter += data_block_len
565 # Open file just in time
568 (stream, filename) = sanitize_open(filename, open_mode)
569 self.report_destination(filename)
570 self._num_downloads += 1
571 except (OSError, IOError), err:
572 self.trouble('ERROR: unable to open for writing: %s' % str(err))
575 stream.write(data_block)
576 except (IOError, OSError), err:
577 self.trouble('\nERROR: unable to write data: %s' % str(err))
578 block_size = self.best_block_size(after - before, data_block_len)
581 percent_str = self.calc_percent(byte_counter, data_len)
582 eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
583 speed_str = self.calc_speed(start, time.time(), byte_counter)
584 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
587 self.slow_down(start, byte_counter)
590 if data_len is not None and str(byte_counter) != data_len:
591 raise ContentTooShortError(byte_counter, long(data_len))
594 class InfoExtractor(object):
595 """Information Extractor class.
597 Information extractors are the classes that, given a URL, extract
598 information from the video (or videos) the URL refers to. This
599 information includes the real video URL, the video title and simplified
600 title, author and others. The information is stored in a dictionary
601 which is then passed to the FileDownloader. The FileDownloader
602 processes this information possibly downloading the video to the file
603 system, among other possible outcomes. The dictionaries must include
604 the following fields:
606 id: Video identifier.
607 url: Final video URL.
608 uploader: Nickname of the video uploader.
609 title: Literal title.
610 stitle: Simplified title.
611 ext: Video filename extension.
612 format: Video format.
613 player_url: SWF Player URL (may be None).
615 The following fields are optional. Their primary purpose is to allow
616 youtube-dl to serve as the backend for a video search function, such
617 as the one in youtube2mp3. They are only used when their respective
618 forced printing functions are called:
620 thumbnail: Full URL to a video thumbnail image.
621 description: One-line video description.
623 Subclasses of this one should re-define the _real_initialize() and
624 _real_extract() methods, as well as the suitable() static method.
625 Probably, they should also be instantiated and added to the main
632 def __init__(self, downloader=None):
633 """Constructor. Receives an optional downloader."""
635 self.set_downloader(downloader)
639 """Receives a URL and returns True if suitable for this IE."""
642 def initialize(self):
643 """Initializes an instance (authentication, etc)."""
645 self._real_initialize()
648 def extract(self, url):
649 """Extracts URL information and returns it in list of dicts."""
651 return self._real_extract(url)
653 def set_downloader(self, downloader):
654 """Sets the downloader for this IE."""
655 self._downloader = downloader
657 def _real_initialize(self):
658 """Real initialization process. Redefine in subclasses."""
661 def _real_extract(self, url):
662 """Real extraction process. Redefine in subclasses."""
665 class YoutubeIE(InfoExtractor):
666 """Information extractor for youtube.com."""
668 _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
669 _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
670 _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
671 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
672 _NETRC_MACHINE = 'youtube'
673 _available_formats = ['37', '22', '35', '18', '34', '5', '17', '13', None] # listed in order of priority for -b flag
674 _video_extensions = {
684 return (re.match(YoutubeIE._VALID_URL, url) is not None)
686 def report_lang(self):
687 """Report attempt to set language."""
688 self._downloader.to_stdout(u'[youtube] Setting language')
690 def report_login(self):
691 """Report attempt to log in."""
692 self._downloader.to_stdout(u'[youtube] Logging in')
694 def report_age_confirmation(self):
695 """Report attempt to confirm age."""
696 self._downloader.to_stdout(u'[youtube] Confirming age')
698 def report_video_webpage_download(self, video_id):
699 """Report attempt to download video webpage."""
700 self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
702 def report_video_info_webpage_download(self, video_id):
703 """Report attempt to download video info webpage."""
704 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
706 def report_information_extraction(self, video_id):
707 """Report attempt to extract video information."""
708 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
710 def report_unavailable_format(self, video_id, format):
711 """Report extracted video URL."""
712 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
714 def report_rtmp_download(self):
715 """Indicate the download will use the RTMP protocol."""
716 self._downloader.to_stdout(u'[youtube] RTMP download detected')
718 def _real_initialize(self):
719 if self._downloader is None:
724 downloader_params = self._downloader.params
726 # Attempt to use provided username and password or .netrc data
727 if downloader_params.get('username', None) is not None:
728 username = downloader_params['username']
729 password = downloader_params['password']
730 elif downloader_params.get('usenetrc', False):
732 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
737 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
738 except (IOError, netrc.NetrcParseError), err:
739 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
743 request = urllib2.Request(self._LANG_URL, None, std_headers)
746 urllib2.urlopen(request).read()
747 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
748 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
751 # No authentication to be performed
757 'current_form': 'loginForm',
759 'action_login': 'Log In',
760 'username': username,
761 'password': password,
763 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
766 login_results = urllib2.urlopen(request).read()
767 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
768 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
770 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
771 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
777 'action_confirm': 'Confirm',
779 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
781 self.report_age_confirmation()
782 age_results = urllib2.urlopen(request).read()
783 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
784 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
787 def _real_extract(self, url):
788 # Extract video id from URL
789 mobj = re.match(self._VALID_URL, url)
791 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
793 video_id = mobj.group(2)
795 # Downloader parameters
800 if self._downloader is not None:
801 params = self._downloader.params
802 format_param = params.get('format', None)
803 if format_param == '0':
804 format_param = self._available_formats[quality_index]
806 elif format_param == '-1':
807 format_param = self._available_formats[quality_index]
812 video_extension = self._video_extensions.get(format_param, 'flv')
815 self.report_video_webpage_download(video_id)
816 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers)
818 video_webpage = urllib2.urlopen(request).read()
819 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
820 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
823 # Attempt to extract SWF player URL
824 mobj = re.search(r'swfConfig.*"(http://.*?watch-.*?\.swf)"', video_webpage)
826 player_url = mobj.group(1)
831 self.report_video_info_webpage_download(video_id)
832 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
833 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
834 % (video_id, el_type))
835 request = urllib2.Request(video_info_url, None, std_headers)
837 video_info_webpage = urllib2.urlopen(request).read()
838 video_info = parse_qs(video_info_webpage)
839 if 'token' in video_info:
841 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
842 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
844 self.report_information_extraction(video_id)
847 if 'token' not in video_info:
848 # Attempt to see if YouTube has issued an error message
849 if 'reason' not in video_info:
850 self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
851 stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
852 stream.write(video_info_webpage)
855 reason = urllib.unquote_plus(video_info['reason'][0])
856 self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
858 token = urllib.unquote_plus(video_info['token'][0])
859 video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
860 if format_param is not None:
861 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
863 # Check possible RTMP download
864 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
865 self.report_rtmp_download()
866 video_real_url = video_info['conn'][0]
869 if 'author' not in video_info:
870 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
872 video_uploader = urllib.unquote_plus(video_info['author'][0])
875 if 'title' not in video_info:
876 self._downloader.trouble(u'ERROR: unable to extract video title')
878 video_title = urllib.unquote_plus(video_info['title'][0])
879 video_title = video_title.decode('utf-8')
880 video_title = sanitize_title(video_title)
883 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
884 simple_title = simple_title.strip(ur'_')
887 if 'thumbnail_url' not in video_info:
888 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
890 else: # don't panic if we can't find it
891 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
894 video_description = 'No description available.'
895 if self._downloader.params.get('forcedescription', False):
896 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
898 video_description = mobj.group(1)
901 # Process video information
902 self._downloader.process_info({
903 'id': video_id.decode('utf-8'),
904 'url': video_real_url.decode('utf-8'),
905 'uploader': video_uploader.decode('utf-8'),
906 'title': video_title,
907 'stitle': simple_title,
908 'ext': video_extension.decode('utf-8'),
909 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
910 'thumbnail': video_thumbnail.decode('utf-8'),
911 'description': video_description.decode('utf-8'),
912 'player_url': player_url,
916 if quality_index == len(self._available_formats):
921 format_param = self._available_formats[quality_index]
925 except UnavailableFormatError, err:
926 if best_quality or all_formats:
927 if quality_index == len(self._available_formats):
928 # I don't ever expect this to happen
930 self._downloader.trouble(u'ERROR: no known formats available for video')
933 self.report_unavailable_format(video_id, format_param)
935 format_param = self._available_formats[quality_index]
938 self._downloader.trouble('ERROR: format not available for video')
942 class MetacafeIE(InfoExtractor):
943 """Information Extractor for metacafe.com."""
945 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
946 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
947 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
950 def __init__(self, youtube_ie, downloader=None):
951 InfoExtractor.__init__(self, downloader)
952 self._youtube_ie = youtube_ie
956 return (re.match(MetacafeIE._VALID_URL, url) is not None)
958 def report_disclaimer(self):
959 """Report disclaimer retrieval."""
960 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
962 def report_age_confirmation(self):
963 """Report attempt to confirm age."""
964 self._downloader.to_stdout(u'[metacafe] Confirming age')
966 def report_download_webpage(self, video_id):
967 """Report webpage download."""
968 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
970 def report_extraction(self, video_id):
971 """Report information extraction."""
972 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
974 def _real_initialize(self):
975 # Retrieve disclaimer
976 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
978 self.report_disclaimer()
979 disclaimer = urllib2.urlopen(request).read()
980 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
981 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
987 'submit': "Continue - I'm over 18",
989 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
991 self.report_age_confirmation()
992 disclaimer = urllib2.urlopen(request).read()
993 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
994 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
997 def _real_extract(self, url):
998 # Extract id and simplified title from URL
999 mobj = re.match(self._VALID_URL, url)
1001 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1004 video_id = mobj.group(1)
1006 # Check if video comes from YouTube
1007 mobj2 = re.match(r'^yt-(.*)$', video_id)
1008 if mobj2 is not None:
1009 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1012 simple_title = mobj.group(2).decode('utf-8')
1013 video_extension = 'flv'
1015 # Retrieve video webpage to extract further information
1016 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1018 self.report_download_webpage(video_id)
1019 webpage = urllib2.urlopen(request).read()
1020 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1021 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1024 # Extract URL, uploader and title from webpage
1025 self.report_extraction(video_id)
1026 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1028 self._downloader.trouble(u'ERROR: unable to extract media URL')
1030 mediaURL = urllib.unquote(mobj.group(1))
1032 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1034 # self._downloader.trouble(u'ERROR: unable to extract gdaKey')
1036 #gdaKey = mobj.group(1)
1038 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1040 video_url = mediaURL
1042 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1044 self._downloader.trouble(u'ERROR: unable to extract title')
1046 video_title = mobj.group(1).decode('utf-8')
1047 video_title = sanitize_title(video_title)
1049 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1051 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1053 video_uploader = mobj.group(1)
1056 # Process video information
1057 self._downloader.process_info({
1058 'id': video_id.decode('utf-8'),
1059 'url': video_url.decode('utf-8'),
1060 'uploader': video_uploader.decode('utf-8'),
1061 'title': video_title,
1062 'stitle': simple_title,
1063 'ext': video_extension.decode('utf-8'),
1067 except UnavailableFormatError:
1068 self._downloader.trouble(u'ERROR: format not available for video')
1071 class GoogleIE(InfoExtractor):
1072 """Information extractor for video.google.com."""
1074 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1076 def __init__(self, downloader=None):
1077 InfoExtractor.__init__(self, downloader)
1081 return (re.match(GoogleIE._VALID_URL, url) is not None)
1083 def report_download_webpage(self, video_id):
1084 """Report webpage download."""
1085 self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
1087 def report_extraction(self, video_id):
1088 """Report information extraction."""
1089 self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
1091 def _real_initialize(self):
1094 def _real_extract(self, url):
1095 # Extract id from URL
1096 mobj = re.match(self._VALID_URL, url)
1098 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1101 video_id = mobj.group(1)
1103 video_extension = 'mp4'
1105 # Retrieve video webpage to extract further information
1106 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1108 self.report_download_webpage(video_id)
1109 webpage = urllib2.urlopen(request).read()
1110 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1111 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1114 # Extract URL, uploader, and title from webpage
1115 self.report_extraction(video_id)
1116 mobj = re.search(r"download_url:'([^']+)'", webpage)
1118 video_extension = 'flv'
1119 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1121 self._downloader.trouble(u'ERROR: unable to extract media URL')
1123 mediaURL = urllib.unquote(mobj.group(1))
1124 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1125 mediaURL = mediaURL.replace('\\x26', '\x26')
1127 video_url = mediaURL
1129 mobj = re.search(r'<title>(.*)</title>', webpage)
1131 self._downloader.trouble(u'ERROR: unable to extract title')
1133 video_title = mobj.group(1).decode('utf-8')
1134 video_title = sanitize_title(video_title)
1135 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1137 # Extract video description
1138 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1140 self._downloader.trouble(u'ERROR: unable to extract video description')
1142 video_description = mobj.group(1).decode('utf-8')
1143 if not video_description:
1144 video_description = 'No description available.'
1146 # Extract video thumbnail
1147 if self._downloader.params.get('forcethumbnail', False):
1148 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1150 webpage = urllib2.urlopen(request).read()
1151 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1152 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1154 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1156 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1158 video_thumbnail = mobj.group(1)
1159 else: # we need something to pass to process_info
1160 video_thumbnail = ''
1164 # Process video information
1165 self._downloader.process_info({
1166 'id': video_id.decode('utf-8'),
1167 'url': video_url.decode('utf-8'),
1169 'title': video_title,
1170 'stitle': simple_title,
1171 'ext': video_extension.decode('utf-8'),
1175 except UnavailableFormatError:
1176 self._downloader.trouble(u'ERROR: format not available for video')
1179 class PhotobucketIE(InfoExtractor):
1180 """Information extractor for photobucket.com."""
1182 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1184 def __init__(self, downloader=None):
1185 InfoExtractor.__init__(self, downloader)
1189 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1191 def report_download_webpage(self, video_id):
1192 """Report webpage download."""
1193 self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
1195 def report_extraction(self, video_id):
1196 """Report information extraction."""
1197 self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
1199 def _real_initialize(self):
1202 def _real_extract(self, url):
1203 # Extract id from URL
1204 mobj = re.match(self._VALID_URL, url)
1206 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1209 video_id = mobj.group(1)
1211 video_extension = 'flv'
1213 # Retrieve video webpage to extract further information
1214 request = urllib2.Request(url)
1216 self.report_download_webpage(video_id)
1217 webpage = urllib2.urlopen(request).read()
1218 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1219 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1222 # Extract URL, uploader, and title from webpage
1223 self.report_extraction(video_id)
1224 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1226 self._downloader.trouble(u'ERROR: unable to extract media URL')
1228 mediaURL = urllib.unquote(mobj.group(1))
1230 video_url = mediaURL
1232 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1234 self._downloader.trouble(u'ERROR: unable to extract title')
1236 video_title = mobj.group(1).decode('utf-8')
1237 video_title = sanitize_title(video_title)
1238 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1240 video_uploader = mobj.group(2).decode('utf-8')
1243 # Process video information
1244 self._downloader.process_info({
1245 'id': video_id.decode('utf-8'),
1246 'url': video_url.decode('utf-8'),
1247 'uploader': video_uploader,
1248 'title': video_title,
1249 'stitle': simple_title,
1250 'ext': video_extension.decode('utf-8'),
1254 except UnavailableFormatError:
1255 self._downloader.trouble(u'ERROR: format not available for video')
1258 class YahooIE(InfoExtractor):
1259 """Information extractor for video.yahoo.com."""
1261 # _VALID_URL matches all Yahoo! Video URLs
1262 # _VPAGE_URL matches only the extractable '/watch/' URLs
1263 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1264 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1266 def __init__(self, downloader=None):
1267 InfoExtractor.__init__(self, downloader)
1271 return (re.match(YahooIE._VALID_URL, url) is not None)
1273 def report_download_webpage(self, video_id):
1274 """Report webpage download."""
1275 self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id)
1277 def report_extraction(self, video_id):
1278 """Report information extraction."""
1279 self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id)
1281 def _real_initialize(self):
1284 def _real_extract(self, url):
1285 # Extract ID from URL
1286 mobj = re.match(self._VALID_URL, url)
1288 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1291 video_id = mobj.group(2)
1292 video_extension = 'flv'
1294 # Rewrite valid but non-extractable URLs as
1295 # extractable English language /watch/ URLs
1296 if re.match(self._VPAGE_URL, url) is None:
1297 request = urllib2.Request(url)
1299 webpage = urllib2.urlopen(request).read()
1300 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1301 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1304 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1306 self._downloader.trouble(u'ERROR: Unable to extract id field')
1308 yahoo_id = mobj.group(1)
1310 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1312 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1314 yahoo_vid = mobj.group(1)
1316 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1317 return self._real_extract(url)
1319 # Retrieve video webpage to extract further information
1320 request = urllib2.Request(url)
1322 self.report_download_webpage(video_id)
1323 webpage = urllib2.urlopen(request).read()
1324 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1325 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1328 # Extract uploader and title from webpage
1329 self.report_extraction(video_id)
1330 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1332 self._downloader.trouble(u'ERROR: unable to extract video title')
1334 video_title = mobj.group(1).decode('utf-8')
1335 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1337 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1339 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1341 video_uploader = mobj.group(1).decode('utf-8')
1343 # Extract video thumbnail
1344 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1346 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1348 video_thumbnail = mobj.group(1).decode('utf-8')
1350 # Extract video description
1351 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1353 self._downloader.trouble(u'ERROR: unable to extract video description')
1355 video_description = mobj.group(1).decode('utf-8')
1356 if not video_description: video_description = 'No description available.'
1358 # Extract video height and width
1359 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1361 self._downloader.trouble(u'ERROR: unable to extract video height')
1363 yv_video_height = mobj.group(1)
1365 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1367 self._downloader.trouble(u'ERROR: unable to extract video width')
1369 yv_video_width = mobj.group(1)
1371 # Retrieve video playlist to extract media URL
1372 # I'm not completely sure what all these options are, but we
1373 # seem to need most of them, otherwise the server sends a 401.
1374 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1375 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1376 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1377 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1378 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1380 self.report_download_webpage(video_id)
1381 webpage = urllib2.urlopen(request).read()
1382 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1383 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1386 # Extract media URL from playlist XML
1387 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1389 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1391 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1392 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1395 # Process video information
1396 self._downloader.process_info({
1397 'id': video_id.decode('utf-8'),
1399 'uploader': video_uploader,
1400 'title': video_title,
1401 'stitle': simple_title,
1402 'ext': video_extension.decode('utf-8'),
1403 'thumbnail': video_thumbnail.decode('utf-8'),
1404 'description': video_description,
1405 'thumbnail': video_thumbnail,
1406 'description': video_description,
1409 except UnavailableFormatError:
1410 self._downloader.trouble(u'ERROR: format not available for video')
1413 class GenericIE(InfoExtractor):
1414 """Generic last-resort information extractor."""
1416 def __init__(self, downloader=None):
1417 InfoExtractor.__init__(self, downloader)
1423 def report_download_webpage(self, video_id):
1424 """Report webpage download."""
1425 self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.')
1426 self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id)
1428 def report_extraction(self, video_id):
1429 """Report information extraction."""
1430 self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id)
1432 def _real_initialize(self):
1435 def _real_extract(self, url):
1436 video_id = url.split('/')[-1]
1437 request = urllib2.Request(url)
1439 self.report_download_webpage(video_id)
1440 webpage = urllib2.urlopen(request).read()
1441 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1442 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1444 except ValueError, err:
1445 # since this is the last-resort InfoExtractor, if
1446 # this error is thrown, it'll be thrown here
1447 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1450 # Start with something easy: JW Player in SWFObject
1451 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1453 # Broaden the search a little bit
1454 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1456 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1459 # It's possible that one of the regexes
1460 # matched, but returned an empty group:
1461 if mobj.group(1) is None:
1462 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1465 video_url = urllib.unquote(mobj.group(1))
1466 video_id = os.path.basename(video_url)
1468 # here's a fun little line of code for you:
1469 video_extension = os.path.splitext(video_id)[1][1:]
1470 video_id = os.path.splitext(video_id)[0]
1472 # it's tempting to parse this further, but you would
1473 # have to take into account all the variations like
1474 # Video Title - Site Name
1475 # Site Name | Video Title
1476 # Video Title - Tagline | Site Name
1477 # and so on and so forth; it's just not practical
1478 mobj = re.search(r'<title>(.*)</title>', webpage)
1480 self._downloader.trouble(u'ERROR: unable to extract title')
1482 video_title = mobj.group(1).decode('utf-8')
1483 video_title = sanitize_title(video_title)
1484 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1486 # video uploader is domain name
1487 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1489 self._downloader.trouble(u'ERROR: unable to extract title')
1491 video_uploader = mobj.group(1).decode('utf-8')
1494 # Process video information
1495 self._downloader.process_info({
1496 'id': video_id.decode('utf-8'),
1497 'url': video_url.decode('utf-8'),
1498 'uploader': video_uploader,
1499 'title': video_title,
1500 'stitle': simple_title,
1501 'ext': video_extension.decode('utf-8'),
1505 except UnavailableFormatError:
1506 self._downloader.trouble(u'ERROR: format not available for video')
1509 class YoutubeSearchIE(InfoExtractor):
1510 """Information Extractor for YouTube search queries."""
1511 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1512 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1513 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1514 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1516 _max_youtube_results = 1000
1518 def __init__(self, youtube_ie, downloader=None):
1519 InfoExtractor.__init__(self, downloader)
1520 self._youtube_ie = youtube_ie
1524 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1526 def report_download_page(self, query, pagenum):
1527 """Report attempt to download playlist page with given number."""
1528 query = query.decode(preferredencoding())
1529 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1531 def _real_initialize(self):
1532 self._youtube_ie.initialize()
1534 def _real_extract(self, query):
1535 mobj = re.match(self._VALID_QUERY, query)
1537 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1540 prefix, query = query.split(':')
1542 query = query.encode('utf-8')
1544 self._download_n_results(query, 1)
1546 elif prefix == 'all':
1547 self._download_n_results(query, self._max_youtube_results)
1553 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1555 elif n > self._max_youtube_results:
1556 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
1557 n = self._max_youtube_results
1558 self._download_n_results(query, n)
1560 except ValueError: # parsing prefix as integer fails
1561 self._download_n_results(query, 1)
1564 def _download_n_results(self, query, n):
1565 """Downloads a specified number of results for a query"""
1568 already_seen = set()
1572 self.report_download_page(query, pagenum)
1573 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1574 request = urllib2.Request(result_url, None, std_headers)
1576 page = urllib2.urlopen(request).read()
1577 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1578 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1581 # Extract video identifiers
1582 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1583 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1584 if video_id not in already_seen:
1585 video_ids.append(video_id)
1586 already_seen.add(video_id)
1587 if len(video_ids) == n:
1588 # Specified n videos reached
1589 for id in video_ids:
1590 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1593 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1594 for id in video_ids:
1595 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1598 pagenum = pagenum + 1
1600 class GoogleSearchIE(InfoExtractor):
1601 """Information Extractor for Google Video search queries."""
1602 _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1603 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1604 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1605 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1607 _max_google_results = 1000
1609 def __init__(self, google_ie, downloader=None):
1610 InfoExtractor.__init__(self, downloader)
1611 self._google_ie = google_ie
1615 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1617 def report_download_page(self, query, pagenum):
1618 """Report attempt to download playlist page with given number."""
1619 query = query.decode(preferredencoding())
1620 self._downloader.to_stdout(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1622 def _real_initialize(self):
1623 self._google_ie.initialize()
1625 def _real_extract(self, query):
1626 mobj = re.match(self._VALID_QUERY, query)
1628 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1631 prefix, query = query.split(':')
1633 query = query.encode('utf-8')
1635 self._download_n_results(query, 1)
1637 elif prefix == 'all':
1638 self._download_n_results(query, self._max_google_results)
1644 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1646 elif n > self._max_google_results:
1647 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
1648 n = self._max_google_results
1649 self._download_n_results(query, n)
1651 except ValueError: # parsing prefix as integer fails
1652 self._download_n_results(query, 1)
1655 def _download_n_results(self, query, n):
1656 """Downloads a specified number of results for a query"""
1659 already_seen = set()
1663 self.report_download_page(query, pagenum)
1664 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1665 request = urllib2.Request(result_url, None, std_headers)
1667 page = urllib2.urlopen(request).read()
1668 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1669 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1672 # Extract video identifiers
1673 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1674 video_id = mobj.group(1)
1675 if video_id not in already_seen:
1676 video_ids.append(video_id)
1677 already_seen.add(video_id)
1678 if len(video_ids) == n:
1679 # Specified n videos reached
1680 for id in video_ids:
1681 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1684 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1685 for id in video_ids:
1686 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1689 pagenum = pagenum + 1
1691 class YahooSearchIE(InfoExtractor):
1692 """Information Extractor for Yahoo! Video search queries."""
1693 _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1694 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1695 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1696 _MORE_PAGES_INDICATOR = r'\s*Next'
1698 _max_yahoo_results = 1000
1700 def __init__(self, yahoo_ie, downloader=None):
1701 InfoExtractor.__init__(self, downloader)
1702 self._yahoo_ie = yahoo_ie
1706 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1708 def report_download_page(self, query, pagenum):
1709 """Report attempt to download playlist page with given number."""
1710 query = query.decode(preferredencoding())
1711 self._downloader.to_stdout(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1713 def _real_initialize(self):
1714 self._yahoo_ie.initialize()
1716 def _real_extract(self, query):
1717 mobj = re.match(self._VALID_QUERY, query)
1719 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1722 prefix, query = query.split(':')
1724 query = query.encode('utf-8')
1726 self._download_n_results(query, 1)
1728 elif prefix == 'all':
1729 self._download_n_results(query, self._max_yahoo_results)
1735 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1737 elif n > self._max_yahoo_results:
1738 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
1739 n = self._max_yahoo_results
1740 self._download_n_results(query, n)
1742 except ValueError: # parsing prefix as integer fails
1743 self._download_n_results(query, 1)
1746 def _download_n_results(self, query, n):
1747 """Downloads a specified number of results for a query"""
1750 already_seen = set()
1754 self.report_download_page(query, pagenum)
1755 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1756 request = urllib2.Request(result_url, None, std_headers)
1758 page = urllib2.urlopen(request).read()
1759 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1760 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1763 # Extract video identifiers
1764 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1765 video_id = mobj.group(1)
1766 if video_id not in already_seen:
1767 video_ids.append(video_id)
1768 already_seen.add(video_id)
1769 if len(video_ids) == n:
1770 # Specified n videos reached
1771 for id in video_ids:
1772 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1775 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1776 for id in video_ids:
1777 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1780 pagenum = pagenum + 1
1782 class YoutubePlaylistIE(InfoExtractor):
1783 """Information Extractor for YouTube playlists."""
1785 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1786 _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1787 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1788 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1791 def __init__(self, youtube_ie, downloader=None):
1792 InfoExtractor.__init__(self, downloader)
1793 self._youtube_ie = youtube_ie
1797 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1799 def report_download_page(self, playlist_id, pagenum):
1800 """Report attempt to download playlist page with given number."""
1801 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1803 def _real_initialize(self):
1804 self._youtube_ie.initialize()
1806 def _real_extract(self, url):
1807 # Extract playlist id
1808 mobj = re.match(self._VALID_URL, url)
1810 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1813 # Download playlist pages
1814 playlist_id = mobj.group(1)
1819 self.report_download_page(playlist_id, pagenum)
1820 request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1822 page = urllib2.urlopen(request).read()
1823 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1824 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1827 # Extract video identifiers
1829 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1830 if mobj.group(1) not in ids_in_page:
1831 ids_in_page.append(mobj.group(1))
1832 video_ids.extend(ids_in_page)
1834 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1836 pagenum = pagenum + 1
1838 for id in video_ids:
1839 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1842 class YoutubeUserIE(InfoExtractor):
1843 """Information Extractor for YouTube users."""
1845 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1846 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1847 _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1850 def __init__(self, youtube_ie, downloader=None):
1851 InfoExtractor.__init__(self, downloader)
1852 self._youtube_ie = youtube_ie
1856 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1858 def report_download_page(self, username):
1859 """Report attempt to download user page."""
1860 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1862 def _real_initialize(self):
1863 self._youtube_ie.initialize()
1865 def _real_extract(self, url):
1867 mobj = re.match(self._VALID_URL, url)
1869 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1872 # Download user page
1873 username = mobj.group(1)
1877 self.report_download_page(username)
1878 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1880 page = urllib2.urlopen(request).read()
1881 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1882 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1885 # Extract video identifiers
1888 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1889 if mobj.group(1) not in ids_in_page:
1890 ids_in_page.append(mobj.group(1))
1891 video_ids.extend(ids_in_page)
1893 for id in video_ids:
1894 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1897 class PostProcessor(object):
1898 """Post Processor class.
1900 PostProcessor objects can be added to downloaders with their
1901 add_post_processor() method. When the downloader has finished a
1902 successful download, it will take its internal chain of PostProcessors
1903 and start calling the run() method on each one of them, first with
1904 an initial argument and then with the returned value of the previous
1907 The chain will be stopped if one of them ever returns None or the end
1908 of the chain is reached.
1910 PostProcessor objects follow a "mutual registration" process similar
1911 to InfoExtractor objects.
1916 def __init__(self, downloader=None):
1917 self._downloader = downloader
1919 def set_downloader(self, downloader):
1920 """Sets the downloader for this PP."""
1921 self._downloader = downloader
1923 def run(self, information):
1924 """Run the PostProcessor.
1926 The "information" argument is a dictionary like the ones
1927 composed by InfoExtractors. The only difference is that this
1928 one has an extra field called "filepath" that points to the
1931 When this method returns None, the postprocessing chain is
1932 stopped. However, this method may return an information
1933 dictionary that will be passed to the next postprocessing
1934 object in the chain. It can be the one it received after
1935 changing some fields.
1937 In addition, this method may raise a PostProcessingError
1938 exception that will be taken into account by the downloader
1941 return information # by default, do nothing
1943 ### MAIN PROGRAM ###
1944 if __name__ == '__main__':
1946 # Modules needed only when running the main program
1950 # Function to update the program file with the latest version from bitbucket.org
1951 def update_self(downloader, filename):
1952 # Note: downloader only used for options
1953 if not os.access (filename, os.W_OK):
1954 sys.exit('ERROR: no write permissions on %s' % filename)
1956 downloader.to_stdout('Updating to latest stable version...')
1957 latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1958 latest_version = urllib.urlopen(latest_url).read().strip()
1959 prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1960 newcontent = urllib.urlopen(prog_url).read()
1961 stream = open(filename, 'w')
1962 stream.write(newcontent)
1964 downloader.to_stdout('Updated to version %s' % latest_version)
1966 # General configuration
1967 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
1968 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
1969 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1971 # Parse command line
1972 parser = optparse.OptionParser(
1973 usage='Usage: %prog [options] url...',
1974 version='2010.04.04',
1975 conflict_handler='resolve',
1978 parser.add_option('-h', '--help',
1979 action='help', help='print this help text and exit')
1980 parser.add_option('-v', '--version',
1981 action='version', help='print program version and exit')
1982 parser.add_option('-U', '--update',
1983 action='store_true', dest='update_self', help='update this program to latest stable version')
1984 parser.add_option('-i', '--ignore-errors',
1985 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
1986 parser.add_option('-r', '--rate-limit',
1987 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
1989 authentication = optparse.OptionGroup(parser, 'Authentication Options')
1990 authentication.add_option('-u', '--username',
1991 dest='username', metavar='UN', help='account username')
1992 authentication.add_option('-p', '--password',
1993 dest='password', metavar='PW', help='account password')
1994 authentication.add_option('-n', '--netrc',
1995 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
1996 parser.add_option_group(authentication)
1998 video_format = optparse.OptionGroup(parser, 'Video Format Options')
1999 video_format.add_option('-f', '--format',
2000 action='store', dest='format', metavar='FMT', help='video format code')
2001 video_format.add_option('-b', '--best-quality',
2002 action='store_const', dest='format', help='download the best quality video possible', const='0')
2003 video_format.add_option('-m', '--mobile-version',
2004 action='store_const', dest='format', help='alias for -f 17', const='17')
2005 video_format.add_option('-d', '--high-def',
2006 action='store_const', dest='format', help='alias for -f 22', const='22')
2007 video_format.add_option('--all-formats',
2008 action='store_const', dest='format', help='download all available video formats', const='-1')
2009 parser.add_option_group(video_format)
2011 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2012 verbosity.add_option('-q', '--quiet',
2013 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2014 verbosity.add_option('-s', '--simulate',
2015 action='store_true', dest='simulate', help='do not download video', default=False)
2016 verbosity.add_option('-g', '--get-url',
2017 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2018 verbosity.add_option('-e', '--get-title',
2019 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2020 verbosity.add_option('--get-thumbnail',
2021 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2022 verbosity.add_option('--get-description',
2023 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2024 verbosity.add_option('--no-progress',
2025 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2026 parser.add_option_group(verbosity)
2028 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2029 filesystem.add_option('-t', '--title',
2030 action='store_true', dest='usetitle', help='use title in file name', default=False)
2031 filesystem.add_option('-l', '--literal',
2032 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2033 filesystem.add_option('-o', '--output',
2034 dest='outtmpl', metavar='TPL', help='output filename template')
2035 filesystem.add_option('-a', '--batch-file',
2036 dest='batchfile', metavar='F', help='file containing URLs to download (\'-\' for stdin)')
2037 filesystem.add_option('-w', '--no-overwrites',
2038 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2039 filesystem.add_option('-c', '--continue',
2040 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2041 parser.add_option_group(filesystem)
2043 (opts, args) = parser.parse_args()
2045 # Batch file verification
2047 if opts.batchfile is not None:
2049 if opts.batchfile == '-':
2052 batchfd = open(opts.batchfile, 'r')
2053 batchurls = batchfd.readlines()
2054 batchurls = [x.strip() for x in batchurls]
2055 batchurls = [x for x in batchurls if len(x) > 0]
2057 sys.exit(u'ERROR: batch file could not be read')
2058 all_urls = batchurls + args
2060 # Conflicting, missing and erroneous options
2061 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2062 parser.error(u'using .netrc conflicts with giving username/password')
2063 if opts.password is not None and opts.username is None:
2064 parser.error(u'account username missing')
2065 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
2066 parser.error(u'using output template conflicts with using title or literal title')
2067 if opts.usetitle and opts.useliteral:
2068 parser.error(u'using title conflicts with using literal title')
2069 if opts.username is not None and opts.password is None:
2070 opts.password = getpass.getpass(u'Type account password and press return:')
2071 if opts.ratelimit is not None:
2072 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2073 if numeric_limit is None:
2074 parser.error(u'invalid rate limit specified')
2075 opts.ratelimit = numeric_limit
2077 # Information extractors
2078 youtube_ie = YoutubeIE()
2079 metacafe_ie = MetacafeIE(youtube_ie)
2080 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2081 youtube_user_ie = YoutubeUserIE(youtube_ie)
2082 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2083 google_ie = GoogleIE()
2084 google_search_ie = GoogleSearchIE(google_ie)
2085 photobucket_ie = PhotobucketIE()
2086 yahoo_ie = YahooIE()
2087 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2088 generic_ie = GenericIE()
2091 fd = FileDownloader({
2092 'usenetrc': opts.usenetrc,
2093 'username': opts.username,
2094 'password': opts.password,
2095 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2096 'forceurl': opts.geturl,
2097 'forcetitle': opts.gettitle,
2098 'forcethumbnail': opts.getthumbnail,
2099 'forcedescription': opts.getdescription,
2100 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2101 'format': opts.format,
2102 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2103 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2104 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2105 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2106 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2107 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2108 or u'%(id)s.%(ext)s'),
2109 'ignoreerrors': opts.ignoreerrors,
2110 'ratelimit': opts.ratelimit,
2111 'nooverwrites': opts.nooverwrites,
2112 'continuedl': opts.continue_dl,
2113 'noprogress': opts.noprogress,
2115 fd.add_info_extractor(youtube_search_ie)
2116 fd.add_info_extractor(youtube_pl_ie)
2117 fd.add_info_extractor(youtube_user_ie)
2118 fd.add_info_extractor(metacafe_ie)
2119 fd.add_info_extractor(youtube_ie)
2120 fd.add_info_extractor(google_ie)
2121 fd.add_info_extractor(google_search_ie)
2122 fd.add_info_extractor(photobucket_ie)
2123 fd.add_info_extractor(yahoo_ie)
2124 fd.add_info_extractor(yahoo_search_ie)
2126 # This must come last since it's the
2127 # fallback if none of the others work
2128 fd.add_info_extractor(generic_ie)
2131 if opts.update_self:
2132 update_self(fd, sys.argv[0])
2135 if len(all_urls) < 1:
2136 if not opts.update_self:
2137 parser.error(u'you must provide at least one URL')
2140 retcode = fd.download(all_urls)
2143 except DownloadError:
2145 except SameFileError:
2146 sys.exit(u'ERROR: fixed output name but more than one file to download')
2147 except KeyboardInterrupt:
2148 sys.exit(u'\nERROR: Interrupted by user')