diff mbox

libstdc++/69703 ignore endianness in codecvt_utf8

Message ID 20160419180749.GD3577@redhat.com
State New
Headers show

Commit Message

Jonathan Wakely April 19, 2016, 6:07 p.m. UTC
This was reported as a bug in the Filesystem library, but it's
actually a problem in the codecvt_utf8 facet that it uses.

Tested x86_64-linux, committed to trunk.
diff mbox

Patch

commit 7f3a547a9e80556030e77ac090e2ad8e04e44abc
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Tue Apr 19 18:32:17 2016 +0100

    libstdc++/69703 ignore endianness in codecvt_utf8
    
    	PR libstdc++/69703
    	* src/c++11/codecvt.cc (__codecvt_utf8_base<char16_t>::do_in)):
    	Override endianness bit in mode.
    	* testsuite/22_locale/codecvt/codecvt_utf8/69703.cc: New test.
    	* testsuite/22_locale/codecvt/codecvt_utf8_utf16/66855.cc: Test
    	that little_endian mode is ignored.
    	* testsuite/experimental/filesystem/path/native/string.cc: New test.

diff --git a/libstdc++-v3/src/c++11/codecvt.cc b/libstdc++-v3/src/c++11/codecvt.cc
index 327beb6..b6b6358 100644
--- a/libstdc++-v3/src/c++11/codecvt.cc
+++ b/libstdc++-v3/src/c++11/codecvt.cc
@@ -789,7 +789,11 @@  do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 {
   range<const char> from{ __from, __from_end };
   range<char16_t> to{ __to, __to_end };
-  auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
+  codecvt_mode mode = codecvt_mode(_M_mode | (consume_header|generate_header));
+#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
+  mode = codecvt_mode(mode | little_endian);
+#endif
+  auto res = ucs2_in(from, to, _M_maxcode, mode);
   __from_next = from.next;
   __to_next = to.next;
   return res;
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8/69703.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8/69703.cc
new file mode 100644
index 0000000..745d2c2
--- /dev/null
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8/69703.cc
@@ -0,0 +1,103 @@ 
+// Copyright (C) 2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-options "-std=gnu++11" }
+
+#include <codecvt>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  const char out[] = "abc";
+  char16_t in[4];
+  std::codecvt_utf8<char16_t> cvt;
+  std::mbstate_t st;
+  const char* no;
+  char16_t* ni;
+  auto res = cvt.in(st, out, out+3, no, in, in+3, ni);
+  VERIFY( res == std::codecvt_base::ok );
+  VERIFY( in[0] == u'a' );
+  VERIFY( in[1] == u'b' );
+  VERIFY( in[2] == u'c' );
+}
+
+void
+test02()
+{
+  bool test __attribute__((unused)) = true;
+
+  const char out[] = "abc";
+  char16_t in[4];
+  std::codecvt_utf8<char16_t, 0x10ffff, std::little_endian> cvt;
+  std::mbstate_t st;
+  const char* no;
+  char16_t* ni;
+  auto res = cvt.in(st, out, out+3, no, in, in+3, ni);
+  VERIFY( res == std::codecvt_base::ok );
+  VERIFY( in[0] == u'a' );
+  VERIFY( in[1] == u'b' );
+  VERIFY( in[2] == u'c' );
+}
+
+void
+test03()
+{
+  bool test __attribute__((unused)) = true;
+
+  const char out[] = "abc";
+  char32_t in[4];
+  std::codecvt_utf8<char32_t> cvt;
+  std::mbstate_t st;
+  const char* no;
+  char32_t* ni;
+  auto res = cvt.in(st, out, out+3, no, in, in+3, ni);
+  VERIFY( res == std::codecvt_base::ok );
+  VERIFY( in[0] == U'a' );
+  VERIFY( in[1] == U'b' );
+  VERIFY( in[2] == U'c' );
+}
+
+
+void
+test04()
+{
+  bool test __attribute__((unused)) = true;
+
+  const char out[] = "abc";
+  char32_t in[4];
+  std::codecvt_utf8<char32_t, 0x10ffff, std::little_endian> cvt;
+  std::mbstate_t st;
+  const char* no;
+  char32_t* ni;
+  auto res = cvt.in(st, out, out+3, no, in, in+3, ni);
+  VERIFY( res == std::codecvt_base::ok );
+  VERIFY( in[0] == U'a' );
+  VERIFY( in[1] == U'b' );
+  VERIFY( in[2] == U'c' );
+}
+
+int
+main()
+{
+  test01();
+  test02();
+  test01();
+  test02();
+}
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8_utf16/66855.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8_utf16/66855.cc
index 05e5bc6..49b750f 100644
--- a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8_utf16/66855.cc
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf8_utf16/66855.cc
@@ -45,8 +45,35 @@  test01()
   VERIFY( buf[3] == utf16[3] );
 }
 
+void
+test02()
+{
+  // Endianness flag should make no difference.
+  std::codecvt_utf8_utf16<char16_t, 0x10ffff, std::little_endian> cvt;
+  char16_t utf16[] = u"\ub098\ub294\ud0dc\uc624";
+  const char16_t* nf16;
+  char utf8[16];
+  char* nt8;
+  std::mbstate_t st{};
+  auto res = cvt.out(st, utf16, utf16+4, nf16, utf8, utf8+16, nt8);
+  VERIFY( res == std::codecvt_base::ok );
+
+  st = {};
+  char16_t buf[4] = {};
+  const char* nf8 = nt8;
+  char16_t* nt16;
+  res = cvt.in(st, utf8, nf8, nf8, buf, buf+4, nt16);
+  VERIFY( res == std::codecvt_base::ok );
+  VERIFY( nt16 == buf+4 );
+  VERIFY( buf[0] == utf16[0] );
+  VERIFY( buf[1] == utf16[1] );
+  VERIFY( buf[2] == utf16[2] );
+  VERIFY( buf[3] == utf16[3] );
+}
+
 int
 main()
 {
   test01();
+  test02();
 }
diff --git a/libstdc++-v3/testsuite/experimental/filesystem/path/native/string.cc b/libstdc++-v3/testsuite/experimental/filesystem/path/native/string.cc
new file mode 100644
index 0000000..05ff57c
--- /dev/null
+++ b/libstdc++-v3/testsuite/experimental/filesystem/path/native/string.cc
@@ -0,0 +1,72 @@ 
+// Copyright (C) 2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-options "-std=gnu++11 -lstdc++fs" }
+
+#include <experimental/filesystem>
+#include <string>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  using namespace std::experimental::filesystem;
+  const std::string s = "abc";
+  path p(s);
+
+  VERIFY( p.native() == s );
+  VERIFY( p.c_str() == s );
+  VERIFY( static_cast<std::string>(p) == s );
+
+  std::string s2 = p; // implicit conversion
+  VERIFY( s2 == p.native() );
+}
+
+void
+test02()
+{
+  bool test __attribute__((unused)) = true;
+
+  using namespace std::experimental::filesystem;
+  const char* s = "abc";
+  path p(s);
+
+  auto str = p.string<char>();
+  VERIFY( str == u"abc" );
+  VERIFY( str == p.string() );
+
+  auto strw = p.string<wchar_t>();
+  VERIFY( strw == L"abc" );
+  VERIFY( strw == p.wstring() );
+
+  auto str16 = p.string<char16_t>();
+  VERIFY( str16 == u"abc" );
+  VERIFY( str16 == p.u16string() );
+
+  auto str32 = p.string<char32_t>();
+  VERIFY( str32 == U"abc" );
+  VERIFY( str32 == p.u32string() );
+}
+
+int
+main()
+{
+  test01();
+  test02();
+}