Patchwork [gccgo] Lower type conversion of string constant

login
register
mail settings
Submitter Ian Taylor
Date June 22, 2010, 5:40 p.m.
Message ID <mcrlja7klt5.fsf@dhcp-172-17-9-151.mtv.corp.google.com>
Download mbox | patch
Permalink /patch/56542/
State New
Headers show

Comments

Ian Taylor - June 22, 2010, 5:40 p.m.
When a string constant is converted to a []byte or []int, the compiler
can do the conversion at compile time rather than calling a function.
This patch implements that.  Committed to gccgo branch.

Ian

Patch

diff -r 05c8f37a6621 go/expressions.cc
--- a/go/expressions.cc	Thu Jun 17 16:56:30 2010 -0700
+++ b/go/expressions.cc	Tue Jun 22 10:38:24 2010 -0700
@@ -2974,6 +2974,63 @@ 
       mpfr_clear(imag);
     }
 
+  if (type->is_open_array_type())
+    {
+      Type* element_type = type->array_type()->element_type()->forwarded();
+      bool is_byte = element_type == Type::lookup_integer_type("uint8");
+      bool is_int = element_type == Type::lookup_integer_type("int");
+      if (is_byte || is_int)
+	{
+	  std::string s;
+	  if (val->string_constant_value(&s))
+	    {
+	      Expression_list* vals = new Expression_list();
+	      if (is_byte)
+		{
+		  for (std::string::const_iterator p = s.begin();
+		       p != s.end();
+		       p++)
+		    {
+		      mpz_t val;
+		      mpz_init_set_ui(val, static_cast<unsigned char>(*p));
+		      Expression* v = Expression::make_integer(&val,
+							       element_type,
+							       location);
+		      vals->push_back(v);
+		      mpz_clear(val);
+		    }
+		}
+	      else
+		{
+		  const char *p = s.data();
+		  const char *pend = s.data() + s.length();
+		  while (p < pend)
+		    {
+		      unsigned int c;
+		      int adv = Lex::fetch_char(p, &c);
+		      if (adv == 0)
+			{
+			  warning_at(this->location(), 0,
+				     "invalid UTF-8 encoding");
+			  adv = 1;
+			}
+		      p += adv;
+		      mpz_t val;
+		      mpz_init_set_ui(val, c);
+		      Expression* v = Expression::make_integer(&val,
+							       element_type,
+							       location);
+		      vals->push_back(v);
+		      mpz_clear(val);
+		    }
+		}
+
+	      return Expression::make_composite_literal(type, false, vals,
+							location);
+	    }
+	}
+    }
+
   return this;
 }
 
diff -r 05c8f37a6621 go/lex.cc
--- a/go/lex.cc	Thu Jun 17 16:56:30 2010 -0700
+++ b/go/lex.cc	Tue Jun 22 10:38:24 2010 -0700
@@ -729,24 +729,25 @@ 
     }
 }
 
-// Advance one UTF-8 character.  Return the pointer beyond the
-// character.  Set *VALUE to the value.
+// Fetch one UTF-8 character from a string.  Set *VALUE to the value.
+// Return the number of bytes read from the string.  Returns 0 if the
+// string does not point to a valid UTF-8 character.
 
-const char*
-Lex::advance_one_utf8_char(const char* p, unsigned int* value)
+int
+Lex::fetch_char(const char* p, unsigned int* value)
 {
   unsigned char c = *p;
   if (c <= 0x7f)
     {
       *value = c;
-      return p + 1;
+      return 1;
     }
   else if ((c & 0xe0) == 0xc0
 	   && (p[1] & 0xc0) == 0x80)
     {
       *value = (((c & 0x1f) << 6)
 		+ (p[1] & 0x3f));
-      return p + 2;
+      return 2;
     }
   else if ((c & 0xf0) == 0xe0
 	   && (p[1] & 0xc0) == 0x80
@@ -755,7 +756,7 @@ 
       *value = (((c & 0xf) << 12)
 		+ ((p[1] & 0x3f) << 6)
 		+ (p[2] & 0x3f));
-      return p + 3;
+      return 3;
     }
   else if ((c & 0xf8) == 0xf0
 	   && (p[1] & 0xc0) == 0x80
@@ -766,7 +767,7 @@ 
 		+ ((p[1] & 0x3f) << 12)
 		+ ((p[2] & 0x3f) << 6)
 		+ (p[3] & 0x3f));
-      return p + 4;
+      return 4;
     }
   else if ((c & 0xfc) == 0xf8
 	   && (p[1] & 0xc0) == 0x80
@@ -779,7 +780,7 @@ 
 		+ ((p[2] & 0x3f) << 12)
 		+ ((p[3] & 0x3f) << 6)
 		+ (p[4] & 0x3f));
-      return p + 5;
+      return 5;
     }
   else if ((c & 0xf7) == 0xfc
 	   && (p[1] & 0xc0) == 0x80
@@ -794,13 +795,30 @@ 
 		+ ((p[3] & 0x3f) << 12)
 		+ ((p[4] & 0x3f) << 6)
 		+ (p[5] & 0x3f));
-      return p + 6;
+      return 6;
     }
   else
     {
+      /* Invalid encoding. Return the Unicode replacement
+	 character.  */
+      *value = 0xfffd;
+      return 0;
+    }
+}
+
+// Advance one UTF-8 character.  Return the pointer beyond the
+// character.  Set *VALUE to the value.
+
+const char*
+Lex::advance_one_utf8_char(const char* p, unsigned int* value)
+{
+  int adv = Lex::fetch_char(p, value);
+  if (adv == 0)
+    {
       this->error("invalid UTF-8 encoding");
       return p + 1;
     }
+  return p + adv;
 }
 
 // Pick up an identifier.
diff -r 05c8f37a6621 go/lex.h
--- a/go/lex.h	Thu Jun 17 16:56:30 2010 -0700
+++ b/go/lex.h	Tue Jun 22 10:38:24 2010 -0700
@@ -336,6 +336,12 @@ 
   append_char(unsigned int v, bool is_charater, std::string* str,
 	      source_location);
 
+  // A helper function.  Fetch a UTF-8 character from STR and store it
+  // in *VALUE.  Return the number of bytes read from STR.  Return 0
+  // if STR does not point to a valid UTF-8 character.
+  static int
+  fetch_char(const char* str, unsigned int *value);
+
  private:
   void
   error(const char*);