From 0d5f4d150758813b253b891f9fec92af4ca66caa Mon Sep 17 00:00:00 2001 From: Koki Takahashi Date: Thu, 12 Nov 2015 18:20:25 +0900 Subject: [PATCH] Fix fatal error of surrogte pair Current implementation of surrogate pair doesn't take codepoints over U+20000 into consideration. This is invalid since original specification of UTF-16 is valid until U+10FFFF. --- index.js | 2 +- test/from-string.js | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/index.js b/index.js index 8645ca4..4857054 100644 --- a/index.js +++ b/index.js @@ -1465,7 +1465,7 @@ function utf8ToBytes (string, units) { } // valid surrogate pair - codePoint = leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00 | 0x10000 + codePoint = (leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00) + 0x10000 } else if (leadSurrogate) { // valid bmp char, but last char was a lead if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD) diff --git a/test/from-string.js b/test/from-string.js index 349d261..e25db26 100644 --- a/test/from-string.js +++ b/test/from-string.js @@ -9,6 +9,13 @@ test('detect utf16 surrogate pairs', function (t) { t.end() }) +test('detect utf16 surrogate pairs over U+20000 until U+10FFFF', function (t) { + var text = '\uD842\uDFB7' + '\uD93D\uDCAD' + '\uDBFF\uDFFF' + var buf = new B(text) + t.equal(text, buf.toString()) + t.end() +}) + test('replace orphaned utf16 surrogate lead code point', function (t) { var text = '\uD83D\uDE38' + '\uD83D' + '\uD83D\uDC4D' var buf = new B(text) -- 2.34.1