From 56781d3d2e476e2e109d0907d89548fd4da05058 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 27 Nov 2012 12:46:09 +0100 Subject: [PATCH] Switch back to underline for invalid characters, and make restricted ASCII-only --- README.md | 4 ++-- test/test_utils.py | 11 +++++++---- youtube-dl.1 | 4 ++-- youtube_dl/__init__.py | 2 +- youtube_dl/utils.py | 13 +++++++++---- 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 14acddbd0..5cf082a7c 100644 --- a/README.md +++ b/README.md @@ -47,8 +47,8 @@ which means you can modify it, redistribute it or use it however you like. %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout. - --restrict-filenames Avoid some characters such as "&" and spaces in - filenames + --restrict-filenames Restrict filenames to only ASCII characters, and + avoid "&" and spaces in filenames -a, --batch-file FILE file containing URLs to download ('-' for stdin) -w, --no-overwrites do not overwrite files -c, --continue resume partially downloaded files diff --git a/test/test_utils.py b/test/test_utils.py index 0a435ddc5..0b57e016c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -22,10 +22,10 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_filename(u'123'), u'123') - self.assertEqual(u'abc-de', sanitize_filename(u'abc/de')) + self.assertEqual(u'abc_de', sanitize_filename(u'abc/de')) self.assertFalse(u'/' in sanitize_filename(u'abc/de///')) - self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de')) + self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de')) self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|')) self.assertEqual(u'yes no', sanitize_filename(u'yes? no')) self.assertEqual(u'this - that', sanitize_filename(u'this: that')) @@ -45,14 +45,17 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_filename(u'123', restricted=True), u'123') - self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True)) + self.assertEqual(u'abc_de', sanitize_filename(u'abc/de', restricted=True)) self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True)) - self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True)) + self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de', restricted=True)) self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True)) self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True)) self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True)) + self.assertEqual(sanitize_filename(u'aäb', restricted=True), u'a_b') + self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename + forbidden = u'"\0\\/&: \'\t\n' for fc in forbidden: print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True))) diff --git a/youtube-dl.1 b/youtube-dl.1 index 64120a8d2..ae303b672 100644 --- a/youtube-dl.1 +++ b/youtube-dl.1 @@ -59,8 +59,8 @@ redistribute it or use it however you like. \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe, \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout. ---restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames +--restrict-filenames\ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin) -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index cbf1dd1a7..c3e0f78e5 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -274,7 +274,7 @@ def parseOpts(): dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') filesystem.add_option('--restrict-filenames', action='store_true', dest='restrictfilenames', - help='Avoid some characters such as "&" and spaces in filenames', default=False) + help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) filesystem.add_option('-a', '--batch-file', dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('-w', '--no-overwrites', diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1f60d34ae..3339f56ec 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -207,15 +207,20 @@ def sanitize_filename(s, restricted=False): elif char == ':': return '_-' if restricted else ' -' elif char in '\\/|*<>': - return '-' + return '_' if restricted and (char in '&\'' or char.isspace()): return '_' + if restricted and ord(char) > 127: + return '_' return char result = u''.join(map(replace_insane, s)) - while '--' in result: - result = result.replace('--', '-') - return result.strip('-') + while '__' in result: + result = result.replace('__', '_') + result = result.strip('_') + if not result: + result = '_' + return result def orderedSet(iterable): """ Remove all duplicates from the input iterable """ -- 2.40.0