From patchwork Tue Aug 31 18:33:21 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [gccgo] Don't permit Unicode surrogate pairs in escape sequences From: Ian Taylor X-Patchwork-Id: 63312 Message-Id: To: gcc-patches@gcc.gnu.org, gofrontend-dev@googlegroups.com Date: Tue, 31 Aug 2010 11:33:21 -0700 This patch changes gccgo to not permit Unicode surrogate pairs in Unicode escape sequences. Surrogate pairs are only used in UTF-16, but in Go escape sequences always generate UTF-8. Committed to gccgo branch. Ian diff -r 79786d3fc04a go/lex.cc --- a/go/lex.cc Tue Aug 31 11:08:59 2010 -0700 +++ b/go/lex.cc Tue Aug 31 11:30:44 2010 -0700 @@ -1173,9 +1173,16 @@ + (hex_value(p[2]) << 8) + (hex_value(p[3]) << 4) + hex_value(p[4])); + if (*value >= 0xd800 && *value < 0xe000) + { + error_at(this->location(),"invalid unicode code point 0x%x", + *value); + // Use the replacement character. + *value = 0xfffd; + } return p + 5; } - this->error("invalid little unicode character"); + this->error("invalid little unicode code point"); return p + 1; case 'U': @@ -1192,9 +1199,17 @@ + (hex_value(p[6]) << 8) + (hex_value(p[7]) << 4) + hex_value(p[8])); + if (*value > 0x10ffff + || (*value >= 0xd800 && *value < 0xe000)) + { + error_at(this->location(), "invalid unicode code point 0x%x", + *value); + // Use the replacement character. + *value = 0xfffd; + } return p + 9; } - this->error("invalid big unicode character"); + this->error("invalid big unicode code point"); return p + 1; default: @@ -1231,7 +1246,7 @@ if (v > 0x10ffff) { warning_at(location, 0, - "unicode character 0x%x out of range in string", v); + "unicode code point 0x%x out of range in string", v); // Turn it into the "replacement character". v = 0xfffd; } Index: gcc/testsuite/go.test/test/char_lit.go =================================================================== --- gcc/testsuite/go.test/test/char_lit.go (revision 163682) +++ gcc/testsuite/go.test/test/char_lit.go (working copy) @@ -30,15 +30,15 @@ func main() { '\xFE' + '\u0123' + '\ubabe' + - '\U0123ABCD' + - '\Ucafebabe' + '\U0010FFFF' + + '\U000ebabe' ; - if '\Ucafebabe' != 0xcafebabe { - print("cafebabe wrong\n"); + if '\U000ebabe' != 0x000ebabe { + print("ebabe wrong\n"); os.Exit(1) } - if i != 0xcc238de1 { - print("number is ", i, " should be ", 0xcc238de1, "\n"); + if i != 0x20e213 { + print("number is ", i, " should be ", 0x20e213, "\n"); os.Exit(1) } }