diff --git a/coolq/cqcode/all_test.go b/coolq/cqcode/all_test.go new file mode 100644 index 0000000..a42e9bb --- /dev/null +++ b/coolq/cqcode/all_test.go @@ -0,0 +1,19 @@ +package cqcode + +import ( + "bytes" + "testing" +) + +func TestIssue1733(t *testing.T) { + const ( + input = "\u0005" + expected = `"\u0005"` + ) + var b bytes.Buffer + writeQuote(&b, input) + got := b.String() + if got != expected { + t.Errorf("want %v but got %v", expected, got) + } +} diff --git a/coolq/cqcode/element.go b/coolq/cqcode/element.go index 87c057b..12dc08e 100644 --- a/coolq/cqcode/element.go +++ b/coolq/cqcode/element.go @@ -3,10 +3,9 @@ package cqcode import ( "bytes" "strings" + "unicode/utf8" "github.com/Mrs4s/MiraiGo/binary" - - "github.com/Mrs4s/go-cqhttp/global" ) // Element single message @@ -61,8 +60,95 @@ func (e *Element) MarshalJSON() ([]byte, error) { buf.WriteByte('"') buf.WriteString(data.K) buf.WriteString(`":`) - buf.WriteString(global.Quote(data.V)) + writeQuote(buf, data.V) } buf.WriteString(`}}`) }), nil } + +const hex = "0123456789abcdef" + +func writeQuote(b *bytes.Buffer, s string) { + i, j := 0, 0 + + b.WriteByte('"') + for j < len(s) { + c := s[j] + + if c >= 0x20 && c <= 0x7f && c != '\\' && c != '"' { + // fast path: most of the time, printable ascii characters are used + j++ + continue + } + + switch c { + case '\\', '"', '\n', '\r', '\t': + b.WriteString(s[i:j]) + b.WriteByte('\\') + switch c { + case '\n': + c = 'n' + case '\r': + c = 'r' + case '\t': + c = 't' + } + b.WriteByte(c) + i = j + 1 + j = j + 1 + continue + + case '<', '>', '&': + b.WriteString(s[i:j]) + b.WriteString(`\u00`) + b.WriteByte(hex[c>>4]) + b.WriteByte(hex[c&0xF]) + i = j + 1 + j = j + 1 + continue + } + + // This encodes bytes < 0x20 except for \t, \n and \r. + if c < 0x20 { + b.WriteString(s[i:j]) + b.WriteString(`\u00`) + b.WriteByte(hex[c>>4]) + b.WriteByte(hex[c&0xF]) + i = j + 1 + j = j + 1 + continue + } + + r, size := utf8.DecodeRuneInString(s[j:]) + + if r == utf8.RuneError && size == 1 { + b.WriteString(s[i:j]) + b.WriteString(`\ufffd`) + i = j + size + j = j + size + continue + } + + switch r { + case '\u2028', '\u2029': + // U+2028 is LINE SEPARATOR. + // U+2029 is PARAGRAPH SEPARATOR. + // They are both technically valid characters in JSON strings, + // but don't work in JSONP, which has to be evaluated as JavaScript, + // and can lead to security holes there. It is valid JSON to + // escape them, so we do so unconditionally. + // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. + b.WriteString(s[i:j]) + b.WriteString(`\u202`) + b.WriteByte(hex[r&0xF]) + i = j + size + j = j + size + continue + } + + j += size + } + + b.WriteString(s[i:]) + b.WriteByte('"') +} diff --git a/global/quote.go b/global/quote.go deleted file mode 100644 index 6e61218..0000000 --- a/global/quote.go +++ /dev/null @@ -1,146 +0,0 @@ -package global - -import ( - "strconv" - "unicode/utf8" -) - -const ( - lowerhex = "0123456789abcdef" - upperhex = "0123456789ABCDEF" -) - -// Quote returns a double-quoted Go string literal representing s. The -// returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for -// control characters and non-printable characters as defined by -// IsPrint. -func Quote(s string) string { - return quoteWith(s, '"', false, false) -} - -func quoteWith(s string, quote byte, asciiOnly, graphicOnly bool) string { - return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, asciiOnly, graphicOnly)) -} - -func appendQuotedWith(buf []byte, s string, quote byte, asciiOnly, graphicOnly bool) []byte { - // Often called with big strings, so preallocate. If there's quoting, - // this is conservative but still helps a lot. - if cap(buf)-len(buf) < len(s) { - nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1) - copy(nBuf, buf) - buf = nBuf - } - buf = append(buf, quote) - for width := 0; len(s) > 0; s = s[width:] { - r := rune(s[0]) - width = 1 - if r >= utf8.RuneSelf { - r, width = utf8.DecodeRuneInString(s) - } - if width == 1 && r == utf8.RuneError { - buf = append(buf, `\x`...) - buf = append(buf, lowerhex[s[0]>>4]) - buf = append(buf, lowerhex[s[0]&0xF]) - continue - } - buf = appendEscapedRune(buf, r, quote, asciiOnly, graphicOnly) - } - buf = append(buf, quote) - return buf -} -func appendEscapedRune(buf []byte, r rune, quote byte, asciiOnly, graphicOnly bool) []byte { - var runeTmp [utf8.UTFMax]byte - if r == rune(quote) || r == '\\' { // always backslashed - buf = append(buf, '\\') - buf = append(buf, byte(r)) - return buf - } - if asciiOnly { - if r < utf8.RuneSelf && strconv.IsPrint(r) { - buf = append(buf, byte(r)) - return buf - } - } else if strconv.IsPrint(r) || graphicOnly && isInGraphicList(r) { - n := utf8.EncodeRune(runeTmp[:], r) - buf = append(buf, runeTmp[:n]...) - return buf - } - switch r { - case '\a': - buf = append(buf, `\a`...) - case '\b': - buf = append(buf, `\b`...) - case '\f': - buf = append(buf, `\f`...) - case '\n': - buf = append(buf, `\n`...) - case '\r': - buf = append(buf, `\r`...) - case '\t': - buf = append(buf, `\t`...) - case '\v': - buf = append(buf, `\v`...) - default: - switch { - case !utf8.ValidRune(r): - r = 0xFFFD - fallthrough - case r < 0x10000: - buf = append(buf, `\u`...) - for s := 12; s >= 0; s -= 4 { - buf = append(buf, lowerhex[r>>uint(s)&0xF]) - } - default: - buf = append(buf, `\U`...) - for s := 28; s >= 0; s -= 4 { - buf = append(buf, lowerhex[r>>uint(s)&0xF]) - } - } - } - return buf -} - -func isInGraphicList(r rune) bool { - // We know r must fit in 16 bits - see makeisprint.go. - if r > 0xFFFF { - return false - } - rr := uint16(r) - i := bsearch16(isGraphic, rr) - return i < len(isGraphic) && rr == isGraphic[i] -} - -// bsearch16 returns the smallest i such that a[i] >= x. -// If there is no such i, bsearch16 returns len(a). -func bsearch16(a []uint16, x uint16) int { - i, j := 0, len(a) - for i < j { - h := i + (j-i)>>1 - if a[h] < x { - i = h + 1 - } else { - j = h - } - } - return i -} - -// isGraphic lists the graphic runes not matched by IsPrint. -var isGraphic = []uint16{ - 0x00a0, - 0x1680, - 0x2000, - 0x2001, - 0x2002, - 0x2003, - 0x2004, - 0x2005, - 0x2006, - 0x2007, - 0x2008, - 0x2009, - 0x200a, - 0x202f, - 0x205f, - 0x3000, -}