}
function utf8Write (buf, string, offset, length) {
- var charsWritten = blitBuffer(utf8ToBytes(string), buf, offset, length)
+ var charsWritten = blitBuffer(utf8ToBytes(string, buf.length - offset), buf, offset, length)
return charsWritten
}
return n.toString(16)
}
-function utf8ToBytes (str) {
- var byteArray = []
- for (var i = 0; i < str.length; i++) {
- var b = str.charCodeAt(i)
- if (b <= 0x7F) {
- byteArray.push(b)
- } else {
- var start = i
- if (b >= 0xD800 && b <= 0xDFFF) i++
- var h = encodeURIComponent(str.slice(start, i+1)).substr(1).split('%')
- for (var j = 0; j < h.length; j++) {
- byteArray.push(parseInt(h[j], 16))
+function utf8ToBytes(string, units) {
+ var codePoint, length = string.length
+ var leadSurrogate = null
+ var units = units || Infinity
+ var bytes = []
+ var i = 0
+
+ for (; i<length; i++) {
+ codePoint = string.charCodeAt(i)
+
+ // is surrogate component
+ if (codePoint > 0xD7FF && codePoint < 0xE000) {
+
+ // last char was a lead
+ if (leadSurrogate) {
+
+ // 2 leads in a row
+ if (codePoint < 0xDC00) {
+ if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD)
+ leadSurrogate = codePoint
+ continue
+ }
+
+ // valid surrogate pair
+ else {
+ codePoint = leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00 | 0x10000
+ leadSurrogate = null
+ }
+ }
+
+ // no lead yet
+ else {
+
+ // unexpected trail
+ if (codePoint > 0xDBFF) {
+ if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD)
+ continue
+ }
+
+ // unpaired lead
+ else if (i + 1 === length) {
+ if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD)
+ continue
+ }
+
+ // valid lead
+ else {
+ leadSurrogate = codePoint
+ continue
+ }
}
}
+
+ // valid bmp char, but last char was a lead
+ else if (leadSurrogate) {
+ if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD)
+ leadSurrogate = null
+ }
+
+ // encode utf8
+ if (codePoint < 0x80) {
+ if ((units -= 1) < 0) break
+ bytes.push(codePoint)
+ }
+ else if (codePoint < 0x800) {
+ if ((units -= 2) < 0) break
+ bytes.push(
+ codePoint >> 0x6 | 0xC0,
+ codePoint & 0x3F | 0x80
+ );
+ }
+ else if (codePoint < 0x10000) {
+ if ((units -= 3) < 0) break
+ bytes.push(
+ codePoint >> 0xC | 0xE0,
+ codePoint >> 0x6 & 0x3F | 0x80,
+ codePoint & 0x3F | 0x80
+ );
+ }
+ else if (codePoint < 0x200000) {
+ if ((units -= 4) < 0) break
+ bytes.push(
+ codePoint >> 0x12 | 0xF0,
+ codePoint >> 0xC & 0x3F | 0x80,
+ codePoint >> 0x6 & 0x3F | 0x80,
+ codePoint & 0x3F | 0x80
+ );
+ }
+ else {
+ throw new Error('Invalid code point')
+ }
}
- return byteArray
+
+ return bytes
}
function asciiToBytes (str) {