From patchwork Tue Jun 22 17:40:38 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ian Lance Taylor X-Patchwork-Id: 56542 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 78750B6EF3 for ; Wed, 23 Jun 2010 03:41:14 +1000 (EST) Received: (qmail 29214 invoked by alias); 22 Jun 2010 17:41:09 -0000 Received: (qmail 28986 invoked by uid 22791); 22 Jun 2010 17:41:05 -0000 X-SWARE-Spam-Status: No, hits=-2.3 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, SPF_HELO_PASS, TW_CC, TW_XF, T_RP_MATCHES_RCVD, T_TVD_MIME_NO_HEADERS X-Spam-Check-By: sourceware.org Received: from smtp-out.google.com (HELO smtp-out.google.com) (216.239.44.51) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Tue, 22 Jun 2010 17:40:56 +0000 Received: from hpaq7.eem.corp.google.com (hpaq7.eem.corp.google.com [172.25.149.7]) by smtp-out.google.com with ESMTP id o5MHenOe004871 for ; Tue, 22 Jun 2010 10:40:54 -0700 Received: from pwj10 (pwj10.prod.google.com [10.241.219.74]) by hpaq7.eem.corp.google.com with ESMTP id o5MHekhw024514 for ; Tue, 22 Jun 2010 10:40:47 -0700 Received: by pwj10 with SMTP id 10so711222pwj.34 for ; Tue, 22 Jun 2010 10:40:46 -0700 (PDT) Received: by 10.142.67.30 with SMTP id p30mr5458875wfa.154.1277228446465; Tue, 22 Jun 2010 10:40:46 -0700 (PDT) Received: from coign.google.com (dhcp-172-22-126-240.mtv.corp.google.com [172.22.126.240]) by mx.google.com with ESMTPS id f20sm6749944rvb.15.2010.06.22.10.40.45 (version=TLSv1/SSLv3 cipher=RC4-MD5); Tue, 22 Jun 2010 10:40:45 -0700 (PDT) To: gcc-patches@gcc.gnu.org Subject: [gccgo] Lower type conversion of string constant From: Ian Lance Taylor Date: Tue, 22 Jun 2010 10:40:38 -0700 Message-ID: User-Agent: Gnus/5.11 (Gnus v5.11) Emacs/22.1 (gnu/linux) MIME-Version: 1.0 X-System-Of-Record: true X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org When a string constant is converted to a []byte or []int, the compiler can do the conversion at compile time rather than calling a function. This patch implements that. Committed to gccgo branch. Ian diff -r 05c8f37a6621 go/expressions.cc --- a/go/expressions.cc Thu Jun 17 16:56:30 2010 -0700 +++ b/go/expressions.cc Tue Jun 22 10:38:24 2010 -0700 @@ -2974,6 +2974,63 @@ mpfr_clear(imag); } + if (type->is_open_array_type()) + { + Type* element_type = type->array_type()->element_type()->forwarded(); + bool is_byte = element_type == Type::lookup_integer_type("uint8"); + bool is_int = element_type == Type::lookup_integer_type("int"); + if (is_byte || is_int) + { + std::string s; + if (val->string_constant_value(&s)) + { + Expression_list* vals = new Expression_list(); + if (is_byte) + { + for (std::string::const_iterator p = s.begin(); + p != s.end(); + p++) + { + mpz_t val; + mpz_init_set_ui(val, static_cast(*p)); + Expression* v = Expression::make_integer(&val, + element_type, + location); + vals->push_back(v); + mpz_clear(val); + } + } + else + { + const char *p = s.data(); + const char *pend = s.data() + s.length(); + while (p < pend) + { + unsigned int c; + int adv = Lex::fetch_char(p, &c); + if (adv == 0) + { + warning_at(this->location(), 0, + "invalid UTF-8 encoding"); + adv = 1; + } + p += adv; + mpz_t val; + mpz_init_set_ui(val, c); + Expression* v = Expression::make_integer(&val, + element_type, + location); + vals->push_back(v); + mpz_clear(val); + } + } + + return Expression::make_composite_literal(type, false, vals, + location); + } + } + } + return this; } diff -r 05c8f37a6621 go/lex.cc --- a/go/lex.cc Thu Jun 17 16:56:30 2010 -0700 +++ b/go/lex.cc Tue Jun 22 10:38:24 2010 -0700 @@ -729,24 +729,25 @@ } } -// Advance one UTF-8 character. Return the pointer beyond the -// character. Set *VALUE to the value. +// Fetch one UTF-8 character from a string. Set *VALUE to the value. +// Return the number of bytes read from the string. Returns 0 if the +// string does not point to a valid UTF-8 character. -const char* -Lex::advance_one_utf8_char(const char* p, unsigned int* value) +int +Lex::fetch_char(const char* p, unsigned int* value) { unsigned char c = *p; if (c <= 0x7f) { *value = c; - return p + 1; + return 1; } else if ((c & 0xe0) == 0xc0 && (p[1] & 0xc0) == 0x80) { *value = (((c & 0x1f) << 6) + (p[1] & 0x3f)); - return p + 2; + return 2; } else if ((c & 0xf0) == 0xe0 && (p[1] & 0xc0) == 0x80 @@ -755,7 +756,7 @@ *value = (((c & 0xf) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f)); - return p + 3; + return 3; } else if ((c & 0xf8) == 0xf0 && (p[1] & 0xc0) == 0x80 @@ -766,7 +767,7 @@ + ((p[1] & 0x3f) << 12) + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f)); - return p + 4; + return 4; } else if ((c & 0xfc) == 0xf8 && (p[1] & 0xc0) == 0x80 @@ -779,7 +780,7 @@ + ((p[2] & 0x3f) << 12) + ((p[3] & 0x3f) << 6) + (p[4] & 0x3f)); - return p + 5; + return 5; } else if ((c & 0xf7) == 0xfc && (p[1] & 0xc0) == 0x80 @@ -794,13 +795,30 @@ + ((p[3] & 0x3f) << 12) + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f)); - return p + 6; + return 6; } else { + /* Invalid encoding. Return the Unicode replacement + character. */ + *value = 0xfffd; + return 0; + } +} + +// Advance one UTF-8 character. Return the pointer beyond the +// character. Set *VALUE to the value. + +const char* +Lex::advance_one_utf8_char(const char* p, unsigned int* value) +{ + int adv = Lex::fetch_char(p, value); + if (adv == 0) + { this->error("invalid UTF-8 encoding"); return p + 1; } + return p + adv; } // Pick up an identifier. diff -r 05c8f37a6621 go/lex.h --- a/go/lex.h Thu Jun 17 16:56:30 2010 -0700 +++ b/go/lex.h Tue Jun 22 10:38:24 2010 -0700 @@ -336,6 +336,12 @@ append_char(unsigned int v, bool is_charater, std::string* str, source_location); + // A helper function. Fetch a UTF-8 character from STR and store it + // in *VALUE. Return the number of bytes read from STR. Return 0 + // if STR does not point to a valid UTF-8 character. + static int + fetch_char(const char* str, unsigned int *value); + private: void error(const char*);