Current implementation of surrogate pair doesn't take codepoints over
U+20000 into consideration. This is invalid since original specification
of UTF-16 is valid until U+10FFFF.
}
// valid surrogate pair
- codePoint = leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00 | 0x10000
+ codePoint = (leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00) + 0x10000
} else if (leadSurrogate) {
// valid bmp char, but last char was a lead
if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD)
t.end()
})
+test('detect utf16 surrogate pairs over U+20000 until U+10FFFF', function (t) {
+ var text = '\uD842\uDFB7' + '\uD93D\uDCAD' + '\uDBFF\uDFFF'
+ var buf = new B(text)
+ t.equal(text, buf.toString())
+ t.end()
+})
+
test('replace orphaned utf16 surrogate lead code point', function (t) {
var text = '\uD83D\uDE38' + '\uD83D' + '\uD83D\uDC4D'
var buf = new B(text)