diff --git a/lib/filename-generator/by-site-structure.js b/lib/filename-generator/by-site-structure.js index d03e4ee6..4142a0d2 100644 --- a/lib/filename-generator/by-site-structure.js +++ b/lib/filename-generator/by-site-structure.js @@ -1,7 +1,6 @@ import path from 'path'; import url from 'url'; -import sanitizeFilename from 'sanitize-filename'; -import {getHostFromUrl, getFilepathFromUrl, getFilenameExtension, shortenFilename} from '../utils/index.js'; +import {getHostFromUrl, getFilepathFromUrl, getFilenameExtension, sanitizeFilename} from '../utils/index.js'; import resourceTypes from '../config/resource-types.js'; import resourceTypeExtensions from '../config/resource-ext-by-type.js'; @@ -49,7 +48,6 @@ export default function generateFilename (resource, {defaultFilename}) { function sanitizeFilepath (filePath) { filePath = path.normalize(filePath); - const pathParts = filePath.split(path.sep).map(pathPart => sanitizeFilename(pathPart, {replacement: '_'})); - pathParts[pathParts.length - 1] = shortenFilename(pathParts[pathParts.length - 1]); + const pathParts = filePath.split(path.sep).map(pathPart => sanitizeFilename(pathPart)); return pathParts.join(path.sep); } diff --git a/lib/filename-generator/by-type.js b/lib/filename-generator/by-type.js index 03554512..ec60073a 100644 --- a/lib/filename-generator/by-type.js +++ b/lib/filename-generator/by-type.js @@ -1,27 +1,26 @@ import path from 'path'; -import sanitizeFilename from 'sanitize-filename'; -import {shortenFilename, getFilenameExtension, getFilenameFromUrl} from '../utils/index.js'; +import {sanitizeFilename, getFilenameExtension, getFilenameFromUrl} from '../utils/index.js'; import typeExtensions from '../config/resource-ext-by-type.js'; export default function generateFilename (resource, {subdirectories, defaultFilename}, occupiedFileNames) { const occupiedNames = getSubDirectoryNames({subdirectories}).concat(occupiedFileNames); let filename = getFilenameForResource(resource, {subdirectories, defaultFilename}); - filename = shortenFilename(sanitizeFilename(filename, {replacement: '_'})); + filename = sanitizeFilename(filename); const extension = getFilenameExtension(filename); const directory = getDirectoryByExtension(extension, {subdirectories, defaultFilename}); - let currentFilename = path.join(directory, filename); + let currentFilepath = path.join(directory, filename); const basename = path.basename(filename, extension); let index = 1; - while (occupiedNames.includes(currentFilename)) { - currentFilename = path.join(directory, `${basename}_${index}${extension}`); + while (occupiedNames.includes(currentFilepath)) { + currentFilepath = path.join(directory, `${basename}_${index}${extension}`); index++; } - return currentFilename; + return currentFilepath; } function getFilenameForResource (resource, {defaultFilename}) { diff --git a/lib/utils/index.js b/lib/utils/index.js index 293ef486..cecf63b4 100644 --- a/lib/utils/index.js +++ b/lib/utils/index.js @@ -1,12 +1,14 @@ import url from 'url'; import path from 'path'; import normalize from 'normalize-url'; +import { default as filenamifySanitizeFilename } from 'filenamify'; import typeByMime from '../config/resource-type-by-mime.js'; import typeByExt from '../config/resource-type-by-ext.js'; import logger from '../logger.js'; -const MAX_FILENAME_LENGTH = 255; + +const MAX_FILENAME_LENGTH = 230; const IS_URL = /^((http[s]?:)?\/\/)/; function isUrl (path) { @@ -97,13 +99,12 @@ function getFilenameExtension (filepath) { return (typeof filepath === 'string') ? path.extname(filepath).toLowerCase() : null; } -function shortenFilename (filename) { - if (filename.length >= MAX_FILENAME_LENGTH) { - const shortFilename = filename.substring(0, 20) + getFilenameExtension(filename); - logger.debug(`[utils] shorten filename: ${filename} -> ${shortFilename}`); - return shortFilename; +function sanitizeFilename (filename) { + const sanitized = filenamifySanitizeFilename(filename, {replacement: '_', maxLength: MAX_FILENAME_LENGTH}); + if (sanitized !== filename) { + logger.debug(`[utils] sanitize filename: ${filename} -> ${sanitized}`); } - return filename; + return sanitized; } function normalizeUrl (u, opts) { @@ -183,7 +184,7 @@ function updateResourceEncoding (resource, encoding) { const updatedText = Buffer.from(resourceText, resource.getEncoding()).toString(encoding); resource.setText(updatedText); } - + resource.setEncoding(encoding); } @@ -197,7 +198,7 @@ export { getFilenameExtension, getHashFromUrl, getHostFromUrl, - shortenFilename, + sanitizeFilename, prettifyFilename, normalizeUrl, urlsEqual, diff --git a/package.json b/package.json index 2c58b324..2112c3f8 100644 --- a/package.json +++ b/package.json @@ -40,11 +40,11 @@ "cheerio": "^1.1.2", "css-url-parser": "^1.0.0", "debug": "^4.3.1", + "filenamify": "^7.0.0", "fs-extra": "^11.1.0", "got": "^14.4.7", "normalize-url": "^8.0.0", "p-queue": "^9.0.0", - "sanitize-filename": "^1.6.3", "srcset": "^5.0.0" }, "devDependencies": { diff --git a/test/unit/filename-generator/by-site-structure-test.js b/test/unit/filename-generator/by-site-structure-test.js index 208a9eae..43386149 100644 --- a/test/unit/filename-generator/by-site-structure-test.js +++ b/test/unit/filename-generator/by-site-structure-test.js @@ -34,7 +34,7 @@ describe('FilenameGenerator: bySiteStructure', () => { it('should replace not allowed characters from filename', () => { const r1 = new Resource('http://example.com/some/path/<*a*>.png'); - bySiteStructureFilenameGenerator(r1, options).should.equalFileSystemPath('example.com/some/path/__a__.png'); + bySiteStructureFilenameGenerator(r1, options).should.equalFileSystemPath('example.com/some/path/_a_.png'); }); it('should replace not allowed characters from path', () => { @@ -74,8 +74,10 @@ describe('FilenameGenerator: bySiteStructure', () => { it('should shorten filename', () => { const resourceFilename = new Array(1000).fill('a').join('') + '.png'; const r = new Resource('http://example.com/' + resourceFilename); - const filename = bySiteStructureFilenameGenerator(r, options); - should(filename.length).be.lessThan(255); + const filepath = bySiteStructureFilenameGenerator(r, options); + const filenameParts = filepath.split('/'); + const filename = filenameParts[filenameParts.length - 1]; + should(filename.length).be.lessThanOrEqual(255); }); it('should shorten filename if resource is html without ext and default name is too long', () => { @@ -85,17 +87,17 @@ describe('FilenameGenerator: bySiteStructure', () => { const filepath = bySiteStructureFilenameGenerator(r, { defaultFilename: defaultFilename }); const filenameParts = filepath.split('/'); const filename = filenameParts[filenameParts.length - 1]; - should(filename.length).be.lessThan(255); + should(filename.length).be.lessThanOrEqual(255); }); it('should return decoded filepath', () => { const r = new Resource('https://developer.mozilla.org/ru/docs/JavaScript_%D1%88%D0%B5%D0%BB%D0%BB%D1%8B'); - const filename = bySiteStructureFilenameGenerator(r, options); - filename.should.equalFileSystemPath('developer.mozilla.org/ru/docs/JavaScript_шеллы'); + const filepath = bySiteStructureFilenameGenerator(r, options); + filepath.should.equalFileSystemPath('developer.mozilla.org/ru/docs/JavaScript_шеллы'); const r2 = new Resource('https://developer.mozilla.org/Hello%20G%C3%BCnter.png'); - const filename2 = bySiteStructureFilenameGenerator(r2, options); - filename2.should.equalFileSystemPath('developer.mozilla.org/Hello Günter.png'); + const filepath2 = bySiteStructureFilenameGenerator(r2, options); + filepath2.should.equalFileSystemPath('developer.mozilla.org/Hello Günter.png'); }); it('should keep query strings', () => { diff --git a/test/unit/filename-generator/by-type-test.js b/test/unit/filename-generator/by-type-test.js index 92b11f85..03b12ee1 100644 --- a/test/unit/filename-generator/by-type-test.js +++ b/test/unit/filename-generator/by-type-test.js @@ -8,27 +8,27 @@ import byTypeFilenameGenerator from '../../../lib/filename-generator/by-type.js' describe('FilenameGenerator: byType', () => { it('should return resource filename', () => { const r = new Resource('http://example.com/a.png', 'b.png'); - const filename = byTypeFilenameGenerator(r, {}, []); - filename.should.equalFileSystemPath('b.png'); + const filepath = byTypeFilenameGenerator(r, {}, []); + filepath.should.equalFileSystemPath('b.png'); }); it('should return url-based filename if resource has no filename', () => { const r = new Resource('http://example.com/a.png'); - const filename = byTypeFilenameGenerator(r, {}, []); - filename.should.equalFileSystemPath('a.png'); + const filepath = byTypeFilenameGenerator(r, {}, []); + filepath.should.equalFileSystemPath('a.png'); }); it('should return url-based filename if resource has empty filename', () => { const r = new Resource('http://example.com/a.png', ''); - const filename = byTypeFilenameGenerator(r, {}, []); - filename.should.equalFileSystemPath('a.png'); + const filepath = byTypeFilenameGenerator(r, {}, []); + filepath.should.equalFileSystemPath('a.png'); }); it('should add missed extensions for html resources', () => { const r = new Resource('http://example.com/about', ''); r.getType = sinon.stub().returns('html'); - const filename = byTypeFilenameGenerator(r, {}, []); - filename.should.equalFileSystemPath('about.html'); + const filepath = byTypeFilenameGenerator(r, {}, []); + filepath.should.equalFileSystemPath('about.html'); }); it('should add missed extensions for css resources', () => { @@ -41,15 +41,15 @@ describe('FilenameGenerator: byType', () => { it('should add missed extensions for js resources', () => { const r = new Resource('http://example.com/js', ''); r.getType = sinon.stub().returns('js'); - const filename = byTypeFilenameGenerator(r, {}, []); - filename.should.equalFileSystemPath('js.js'); + const filepath = byTypeFilenameGenerator(r, {}, []); + filepath.should.equalFileSystemPath('js.js'); }); it('should not add missed extensions for other resources', () => { const r = new Resource('http://1.gravatar.com/avatar/4d63e4a045c7ff22accc33dc08442f86?s=140&d=%2Fwp-content%2Fuploads%2F2015%2F05%2FGood-JOb-150x150.jpg&r=g', ''); r.getType = sinon.stub().returns('home'); - const filename = byTypeFilenameGenerator(r, {}, []); - filename.should.equalFileSystemPath('4d63e4a045c7ff22accc33dc08442f86'); + const filepath = byTypeFilenameGenerator(r, {}, []); + filepath.should.equalFileSystemPath('4d63e4a045c7ff22accc33dc08442f86'); }); it('should return filename with correct subdirectory', () => { @@ -60,8 +60,8 @@ describe('FilenameGenerator: byType', () => { }; const r = new Resource('http://example.com/a.png'); - const filename = byTypeFilenameGenerator(r, options, []); - filename.should.equalFileSystemPath('img/a.png'); + const filepath = byTypeFilenameGenerator(r, options, []); + filepath.should.equalFileSystemPath('img/a.png'); }); it('should return filename with correct subdirectory when string cases are different', () => { @@ -72,14 +72,14 @@ describe('FilenameGenerator: byType', () => { }; const r = new Resource('http://example.com/a.PNG'); - const f = byTypeFilenameGenerator(r, options, []); - f.should.equalFileSystemPath('img/a.PNG'); + const filepath = byTypeFilenameGenerator(r, options, []); + filepath.should.equalFileSystemPath('img/a.PNG'); }); it('should return different filename if desired filename is occupied', () => { const r = new Resource('http://second-example.com/a.png'); - const filename = byTypeFilenameGenerator(r, {}, [ 'a.png' ]); - filename.should.not.equalFileSystemPath('a.png'); + const filepath = byTypeFilenameGenerator(r, {}, [ 'a.png' ]); + filepath.should.not.equalFileSystemPath('a.png'); }); it('should return different filename if desired filename is occupied N times', () => { @@ -89,30 +89,32 @@ describe('FilenameGenerator: byType', () => { const r3 = new Resource('http://third-example.com/a.png'); const r4 = new Resource('http://fourth-example.com/a.png'); - const f1 = byTypeFilenameGenerator(r1, {}, occupiedFilenames); - f1.should.equalFileSystemPath('a.png'); - occupiedFilenames.push(f1); + const fp1 = byTypeFilenameGenerator(r1, {}, occupiedFilenames); + fp1.should.equalFileSystemPath('a.png'); + occupiedFilenames.push(fp1); - const f2 = byTypeFilenameGenerator(r2, {}, occupiedFilenames); - f2.should.not.equal(f1); - occupiedFilenames.push(f2); + const fp2 = byTypeFilenameGenerator(r2, {}, occupiedFilenames); + fp2.should.not.equal(fp1); + occupiedFilenames.push(fp2); - const f3 = byTypeFilenameGenerator(r3, {}, occupiedFilenames); - f3.should.not.equal(f1); - f3.should.not.equal(f2); - occupiedFilenames.push(f3); + const fp3 = byTypeFilenameGenerator(r3, {}, occupiedFilenames); + fp3.should.not.equal(fp1); + fp3.should.not.equal(fp2); + occupiedFilenames.push(fp3); - const f4 = byTypeFilenameGenerator(r4, {}, occupiedFilenames); - f4.should.not.equal(f1); - f4.should.not.equal(f2); - f4.should.not.equal(f3); + const fp4 = byTypeFilenameGenerator(r4, {}, occupiedFilenames); + fp4.should.not.equal(fp1); + fp4.should.not.equal(fp2); + fp4.should.not.equal(fp3); }); it('should shorten filename', () => { const resourceFilename = new Array(1000).fill('a').join('') + '.png'; const r = new Resource('http://example.com/a.png', resourceFilename); - const filename = byTypeFilenameGenerator(r, {}, []); - should(filename.length).be.lessThan(255); + const filepath = byTypeFilenameGenerator(r, {}, []); + const filenameParts = filepath.split('/'); + const filename = filenameParts[filenameParts.length - 1]; + should(filename.length).be.lessThanOrEqual(255); }); it('should return different short filename if first short filename is occupied', () => { @@ -121,28 +123,30 @@ describe('FilenameGenerator: byType', () => { const r1 = new Resource('http://first-example.com/a.png', resourceFilename); const r2 = new Resource('http://second-example.com/a.png', resourceFilename); - const f1 = byTypeFilenameGenerator(r1, {}, []); - should(f1.length).be.lessThan(255); + const fp1 = byTypeFilenameGenerator(r1, {}, []); + const filenameParts1 = fp1.split('/'); + const filename1 = filenameParts1[filenameParts1.length - 1]; + should(filename1.length).be.lessThanOrEqual(255); - const f2 = byTypeFilenameGenerator(r2, {}, [ f1 ]); - should(f2.length).be.lessThan(255); - should(f2).not.be.eql(f1); - - should(f2).not.be.eql(f1); + const fp2 = byTypeFilenameGenerator(r2, {}, [ fp1 ]); + const filenameParts2 = fp2.split('/'); + const filename2 = filenameParts2[filenameParts2.length - 1]; + should(filename2.length).be.lessThanOrEqual(255); + should(filename2).not.be.eql(filename1); }); it('should return decoded url-based filename', () => { const r = new Resource('https://developer.mozilla.org/ru/docs/JavaScript_%D1%88%D0%B5%D0%BB%D0%BB%D1%8B'); - const filename = byTypeFilenameGenerator(r, {}, []); - filename.should.equalFileSystemPath('JavaScript_шеллы'); + const filepath = byTypeFilenameGenerator(r, {}, []); + filepath.should.equalFileSystemPath('JavaScript_шеллы'); const r2 = new Resource('https://developer.mozilla.org/Hello%20G%C3%BCnter.png'); - const filename2 = byTypeFilenameGenerator(r2, {}, []); - filename2.should.equalFileSystemPath('Hello Günter.png'); + const filepath2 = byTypeFilenameGenerator(r2, {}, []); + filepath2.should.equalFileSystemPath('Hello Günter.png'); }); it('should remove not allowed characters from filename', () => { const r1 = new Resource('http://example.com/some/path/<*a*>.png'); - byTypeFilenameGenerator(r1, {}, []).should.equalFileSystemPath('__a__.png'); + byTypeFilenameGenerator(r1, {}, []).should.equalFileSystemPath('_a_.png'); }); }); diff --git a/test/unit/utils/utils-test.js b/test/unit/utils/utils-test.js index 5a9a6ad3..1a1e2cca 100644 --- a/test/unit/utils/utils-test.js +++ b/test/unit/utils/utils-test.js @@ -2,9 +2,9 @@ import should from 'should'; import { isUrl, getUrl, getUnixPath, getFilenameFromUrl, getFilepathFromUrl, getHashFromUrl, getRelativePath, - shortenFilename, prettifyFilename, + prettifyFilename, isUriSchemaSupported, urlsEqual, - normalizeUrl, getCharsetFromCss + normalizeUrl, getCharsetFromCss, sanitizeFilename } from '../../../lib/utils/index.js'; describe('Utils', function () { @@ -130,51 +130,41 @@ describe('Utils', function () { }); }); - describe('#shortenFilename', function() { + describe('#sanitizeFilename', function() { it('should leave file with length < 255 as is', function() { var f1 = new Array(25).fill('a').join(''); should(f1.length).be.eql(25); - should(shortenFilename(f1)).be.eql(f1); + should(sanitizeFilename(f1)).be.eql(f1); var f2 = new Array(25).fill('a').join('') + '.txt'; should(f2.length).be.eql(29); - should(shortenFilename(f2)).be.eql(f2); + should(sanitizeFilename(f2)).be.eql(f2); }); it('should shorten file with length = 255', function() { var f1 = new Array(255).fill('a').join(''); should(f1.length).be.eql(255); - should(shortenFilename(f1).length).be.lessThan(255); + should(sanitizeFilename(f1).length).be.lessThanOrEqual(255); }); it('should shorten file with length > 255', function() { var f1 = new Array(1255).fill('a').join(''); should(f1.length).be.eql(1255); - should(shortenFilename(f1).length).be.lessThan(255); + should(sanitizeFilename(f1).length).be.lessThanOrEqual(255); }); it('should shorten file with length = 255 and keep extension', function() { var f1 = new Array(251).fill('a').join('') + '.txt'; should(f1.length).be.eql(255); - should(shortenFilename(f1).length).be.lessThan(255); - should(shortenFilename(f1).split('.')[1]).be.eql('txt'); + should(sanitizeFilename(f1).length).be.lessThanOrEqual(255); + should(sanitizeFilename(f1).split('.')[1]).be.eql('txt'); }); it('should shorten file with length > 255 and keep extension', function() { var f1 = new Array(1251).fill('a').join('') + '.txt'; should(f1.length).be.eql(1255); - should(shortenFilename(f1).length).be.lessThan(255); - should(shortenFilename(f1).split('.')[1]).be.eql('txt'); - }); - - it('should shorten file with length > 255 to have basename length 20 chars', function() { - var f1 = new Array(500).fill('a').join(''); - should(f1.length).be.eql(500); - should(shortenFilename(f1).split('.')[0].length).be.eql(20); - - var f2 = new Array(500).fill('a').join('') + '.txt'; - should(f2.length).be.eql(504); - should(shortenFilename(f2).split('.')[0].length).be.eql(20); + should(sanitizeFilename(f1).length).be.lessThanOrEqual(255); + should(sanitizeFilename(f1).split('.')[1]).be.eql('txt'); }); });