}
function utf8Slice (buf, start, end) {
- var res = ''
- var tmp = ''
end = Math.min(buf.length, end)
-
- for (var i = start; i < end; i++) {
- if (buf[i] <= 0x7F) {
- res += decodeUtf8Char(tmp) + String.fromCharCode(buf[i])
- tmp = ''
+ var firstByte, secondByte, thirdByte, fourthByte, bytesPerSequence, tempCodePoint, codePoint, res = []
+ var i = start
+
+ for (; i < end; i += bytesPerSequence) {
+ firstByte = buf[i]
+ codePoint = 0xFFFD
+
+ if (firstByte > 0xEF) {
+ bytesPerSequence = 4
+ } else if (firstByte > 0xDF) {
+ bytesPerSequence = 3
+ } else if (firstByte > 0xBF) {
+ bytesPerSequence = 2
} else {
- tmp += '%' + buf[i].toString(16)
+ bytesPerSequence = 1
}
+
+ if (i + bytesPerSequence <= end) {
+ switch (bytesPerSequence) {
+ case 1:
+ if (firstByte < 0x80) {
+ codePoint = firstByte
+ }
+ break
+ case 2:
+ secondByte = buf[i + 1]
+ if ((secondByte & 0xC0) === 0x80) {
+ tempCodePoint = (firstByte & 0x1F) << 0x6 | (secondByte & 0x3F)
+ if (tempCodePoint > 0x7F) {
+ codePoint = tempCodePoint
+ }
+ }
+ break
+ case 3:
+ secondByte = buf[i + 1]
+ thirdByte = buf[i + 2]
+ if ((secondByte & 0xC0) === 0x80 && (thirdByte & 0xC0) === 0x80) {
+ tempCodePoint = (firstByte & 0xF) << 0xC | (secondByte & 0x3F) << 0x6 | (thirdByte & 0x3F)
+ if (tempCodePoint > 0x7FF) {
+ codePoint = tempCodePoint
+ }
+ }
+ break
+ case 4:
+ secondByte = buf[i + 1]
+ thirdByte = buf[i + 2]
+ fourthByte = buf[i + 3]
+ if ((secondByte & 0xC0) === 0x80 && (thirdByte & 0xC0) === 0x80 && (fourthByte & 0xC0) === 0x80) {
+ tempCodePoint = (firstByte & 0xF) << 0x12 | (secondByte & 0x3F) << 0xC | (thirdByte & 0x3F) << 0x6 | (fourthByte & 0x3F)
+ if (tempCodePoint > 0xFFFF && tempCodePoint < 0x110000) {
+ codePoint = tempCodePoint
+ }
+ }
+ }
+ }
+
+ if (codePoint === 0xFFFD) {
+ // we generated an invalid codePoint so make sure to only advance by 1 byte
+ bytesPerSequence = 1
+ } else if (codePoint > 0xFFFF) {
+ // encode to utf16 (surrogate pair dance)
+ codePoint -= 0x10000
+ res.push(codePoint >>> 10 & 0x3FF | 0xD800)
+ codePoint = 0xDC00 | codePoint & 0x3FF
+ }
+
+ res.push(codePoint)
}
- return res + decodeUtf8Char(tmp)
+ return String.fromCharCode.apply(String, res)
}
function asciiSlice (buf, start, end) {
}
return i
}
-
-function decodeUtf8Char (str) {
- try {
- return decodeURIComponent(str)
- } catch (err) {
- return String.fromCharCode(0xFFFD) // UTF 8 invalid char
- }
-}