@@ -624,6 +624,110 @@ _GLIBCXX_END_NAMESPACE_CXX11
bool _M_always_noconv;
};
+#if __cplusplus >= 202002L
+ template<typename _CharT = char>
+ bool
+ __to_valid_utf8(string& __s)
+ {
+ // TODO if _CharT is wchar_t then transcode at the same time.
+
+ unsigned __seen = 0, __needed = 0;
+ unsigned char __lo_bound = 0x80, __hi_bound = 0xBF;
+ size_t __errors = 0;
+
+ auto __q = __s.data(), __eoq = __q + __s.size();
+ while (__q != __eoq)
+ {
+ unsigned char __byte = *__q;
+ if (__needed == 0)
+ {
+ if (__byte <= 0x7F) // 0x00 to 0x7F
+ {
+ while (++__q != __eoq && (unsigned char)*__q <= 0x7F)
+ { } // Fast forward to the next non-ASCII character.
+ continue;
+ }
+ else if (__byte < 0xC2)
+ {
+ *__q = 0xFF;
+ ++__errors;
+ }
+ else if (__byte <= 0xDF) // 0xC2 to 0xDF
+ {
+ __needed = 1;
+ }
+ else if (__byte <= 0xEF) // 0xE0 to 0xEF
+ {
+ if (__byte == 0xE0)
+ __lo_bound = 0xA0;
+ else if (__byte == 0xED)
+ __hi_bound = 0x9F;
+
+ __needed = 2;
+ }
+ else if (__byte <= 0xF4) // 0xF0 to 0xF4
+ {
+ if (__byte == 0xF0)
+ __lo_bound = 0x90;
+ else if (__byte == 0xF4)
+ __hi_bound = 0x8F;
+
+ __needed = 3;
+ }
+ else
+ {
+ *__q = 0xFF;
+ ++__errors;
+ }
+ }
+ else
+ {
+ if (__byte < __lo_bound || __byte > __hi_bound)
+ {
+ *(__q - __seen - 1) = 0xFF;
+ __builtin_memset(__q - __seen, 0xFE, __seen);
+ ++__errors;
+ __needed = __seen = 0;
+ __lo_bound = 0x80;
+ __hi_bound = 0xBF;
+ continue; // Reprocess the current character.
+ }
+
+ __lo_bound = 0x80;
+ __hi_bound = 0xBF;
+ ++__seen;
+ if (__seen == __needed)
+ __needed = __seen = 0;
+ }
+ __q++;
+ }
+
+ if (__needed)
+ {
+ // The string ends with an incomplete multibyte sequence.
+ if (__seen)
+ __s.resize(__s.size() - __seen);
+ __s.back() = 0xFF;
+ ++__errors;
+ }
+
+ if (__errors == 0)
+ return true;
+
+ string __s2;
+ __s2.reserve(__s.size() + __errors * 2);
+ for (unsigned char __byte : __s)
+ {
+ if (__byte == 0xFF)
+ __s2 += "\uFFFD";
+ else if (__byte != 0xFE)
+ __s2 += (char)__byte;
+ }
+ __s = std::move(__s2);
+ return false;
+ }
+#endif // C++20
+
/// @} group locales
_GLIBCXX_END_NAMESPACE_VERSION
@@ -917,42 +917,68 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
inline void
vprint_unicode(ostream& __os, string_view __fmt, format_args __args)
{
- // TODO: diagnose invalid UTF-8 code units
-#ifdef _WIN32
- int __fd_for_console(std::streambuf*);
- void __write_utf16_to_console(int, string);
-
- // If stream refers to a terminal convert to UTF-16 and use WriteConsoleW.
- if (int __fd = __fd_for_console(__os.rdbuf()); __fd >= 0)
+ ostream::sentry __cerb(__os);
+ if (__cerb)
{
- ostream::sentry __cerb(__os);
- if (__cerb)
+ string __out = std::vformat(__fmt, __args);
+ std::__to_valid_utf8(__out);
+
+#ifdef _WIN32
+ int __fd_for_console(std::streambuf*);
+ void __write_utf16_to_console(int, string);
+
+ // If stream refers to a terminal output UTF-16 using WriteConsoleW.
+ if (int __fd = __fd_for_console(__os.rdbuf()); __fd >= 0)
{
- string __out = std::vformat(__fmt, __args);
ios_base::iostate __err = ios_base::goodbit;
__try
- {
- if (__os.rdbuf()->pubsync() == -1)
- __err = ios::badbit;
- else if (__write_utf16_to_console(__fd, __out))
- __err = ios::badbit;
- }
+ {
+ if (__os.rdbuf()->pubsync() == -1)
+ __err = ios::badbit;
+ else if (__write_utf16_to_console(__fd, __out))
+ __err = ios::badbit;
+ }
__catch(const __cxxabiv1::__forced_unwind&)
- {
- __os._M_setstate(ios_base::badbit);
- __throw_exception_again;
- }
+ {
+ __os._M_setstate(ios_base::badbit);
+ __throw_exception_again;
+ }
__catch(...)
- { __os._M_setstate(ios_base::badbit); }
+ { __os._M_setstate(ios_base::badbit); }
if (__err)
__os.setstate(__err);
+ return;
}
- }
#endif
- std::vprint_nonunicode(__os, __fmt, __args);
- }
+ __try
+ {
+ const streamsize __w = __os.width();
+ const streamsize __n = __out.size();
+ if (__w > __n)
+ {
+ const bool __left
+ = (__os.flags() & ios_base::adjustfield) == ios_base::left;
+ if (!__left)
+ std::__ostream_fill(__os, __w - __n);
+ if (__os.good())
+ std::__ostream_write(__os, __out.data(), __n);
+ if (__left && __os.good())
+ std::__ostream_fill(__os, __w - __n);
+ }
+ else
+ std::__ostream_write(__os, __out.data(), __n);
+ }
+ __catch(const __cxxabiv1::__forced_unwind&)
+ {
+ __os._M_setstate(ios_base::badbit);
+ __throw_exception_again;
+ }
+ __catch(...)
+ { __os._M_setstate(ios_base::badbit); }
+ }
+ }
template<typename... _Args>
inline void
@@ -62,7 +62,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
inline void
vprint_unicode(FILE* __stream, string_view __fmt, format_args __args)
{
- // TODO: diagnose invalid UTF-8 code units
+ string __out = std::vformat(__fmt, __args);
+ std::__to_valid_utf8(__out);
+
#ifdef _WIN32
int __fd_for_console(FILE*);
void __write_utf16_to_console(int, string);
@@ -82,7 +84,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_GLIBCXX_THROW_OR_ABORT(system_error(__e, "std::vprint_unicode"));
}
#endif
- std::vprint_nonunicode(__stream, __fmt, __args);
+
+ if (std::fwrite(__out.data(), 1, __out.size(), __stream) != __out.size())
+ __throw_system_error(EIO);
}
template<typename... _Args>