Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions helper/unicode.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const _ = require('lodash');
const regenerate = require('regenerate');
const unicodeToArray = require('lodash/_unicodeToArray');

// non-printable control characters
// ref: https://en.wikipedia.org/wiki/List_of_Unicode_characters
Expand Down Expand Up @@ -94,3 +95,31 @@ function normalize(str) {
}

module.exports.normalize = normalize;

// unicode aware string length function
// note: ported from 'npm stringz' using 'lodash' internals in place of 'char-regex'
module.exports.length = (str) => {

// sanity checking
if (!_.isString(str)) { throw new Error('invalid string'); }

// return count of unicode characters
return unicodeToArray(str).length;
};

// unicode aware substring function
// note: ported from 'npm stringz' using 'lodash' internals in place of 'char-regex'
module.exports.substring = (str, begin, end) => {

// sanity checking
if (!_.isString(str)) { throw new Error('invalid string'); }

// even though negative numbers work here, they're not in the spec
if (!_.isFinite(begin) || begin < 0) { begin = 0; }
if (_.isFinite(end) && end < 0) { end = 0; }

const chars = unicodeToArray(str);
if (chars.length === 0){ return ''; }

return chars.slice(begin, end).join('');
};
4 changes: 2 additions & 2 deletions sanitizer/_text.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ function _sanitize( raw, clean ){
if( !_.isString(text) || _.isEmpty(text) ){
messages.errors.push(`invalid param 'text': text length, must be >0`);
} else {
if( text.length > MAX_TEXT_LENGTH ){
if( unicode.length(text) > MAX_TEXT_LENGTH ){
messages.warnings.push(`param 'text' truncated to ${MAX_TEXT_LENGTH} characters`);
text = text.substring(0, MAX_TEXT_LENGTH);
text = unicode.substring(text, 0, MAX_TEXT_LENGTH);
}
clean.text = text;
}
Expand Down
14 changes: 14 additions & 0 deletions test/unit/sanitizer/_text.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const sanitizer = require('../../../sanitizer/_text')();
const unicode = require('../../../helper/unicode');

module.exports.tests = {};

Expand Down Expand Up @@ -154,6 +155,19 @@ it again and again until we reach our destination.` };
t.deepEquals(messages.warnings, [`param 'text' truncated to 140 characters`]);
t.end();
});

// https://github.com/pelias/api/issues/1574
test('truncate should be unicode aware', (t) => {
const raw = { text: 'a' + '👩‍❤️‍👩'.repeat(200) };
const clean = {};
const messages = sanitizer.sanitize(raw, clean);

t.equals(unicode.length(clean.text), 140);
t.equals(clean.text, 'a' + '👩‍❤️‍👩'.repeat(139));
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [`param 'text' truncated to 140 characters`]);
t.end();
});
};

module.exports.all = (tape, common) => {
Expand Down