diff mbox

Support utf8 chars in pod docs

Message ID 1332195957-23987-1-git-send-email-mjt@msgid.tls.msk.ru
State New
Headers show

Commit Message

Michael Tokarev March 19, 2012, 10:25 p.m. UTC
We've at least one UTF8 char in the qemu texi doc:

 $ grep Tibor qemu-doc.texi
 by Tibor "TS" Schütz.
 $ man ./qemu.1 | grep Tibor
        by Tibor "TS" SchA~Xtz.

This patch allows utf8 in man/pod docs.

Initially it was split into two parts and sent on 2012-02-02.
Resending it again (3rd time) now in merged form.  If any
other generalizations of $(POD2MAN) are needed it can be done
in a separate patch.  Current form of $(POD2MAN) is choosen
to be able to easily change it if some implementation does
not support utf8 or resulting output has issues with local
man(1) program/macros.

First, add @documentencoding in scripts/texi2pod.pl:

Currently our texi2pod ignores @documentencoding even if it is set
properly in *.texi files.  This results in a mojibake in documents
generated from qemu.pod (which is generated from qemu-doc.texi by
texi2pod), because the rest of the tools assumes ASCII encoding.

This patch recognizes first @documentencoding in input and places
it at the beginning of output as =encoding directive.

Second, run pod2man with --utf8 option to enable utf8 in manpages:

This option makes no difference for manpages which contains only
ascii chars.  But for manpages with actual UTF8 characters (qemu
docs contains these), this change allows to see real characters
instead of mojibakes or substitutes.

Signed-off-By: Michael Tokarev <mjt@tls.msk.ru>
---
 Makefile            |    9 +++++----
 scripts/texi2pod.pl |    9 +++++++++
 2 files changed, 14 insertions(+), 4 deletions(-)

Comments

Blue Swirl March 24, 2012, 4:13 p.m. UTC | #1
Thanks, applied.

On Mon, Mar 19, 2012 at 22:25, Michael Tokarev <mjt@tls.msk.ru> wrote:
> We've at least one UTF8 char in the qemu texi doc:
>
>  $ grep Tibor qemu-doc.texi
>  by Tibor "TS" Schütz.
>  $ man ./qemu.1 | grep Tibor
>        by Tibor "TS" SchA~Xtz.
>
> This patch allows utf8 in man/pod docs.
>
> Initially it was split into two parts and sent on 2012-02-02.
> Resending it again (3rd time) now in merged form.  If any
> other generalizations of $(POD2MAN) are needed it can be done
> in a separate patch.  Current form of $(POD2MAN) is choosen
> to be able to easily change it if some implementation does
> not support utf8 or resulting output has issues with local
> man(1) program/macros.
>
> First, add @documentencoding in scripts/texi2pod.pl:
>
> Currently our texi2pod ignores @documentencoding even if it is set
> properly in *.texi files.  This results in a mojibake in documents
> generated from qemu.pod (which is generated from qemu-doc.texi by
> texi2pod), because the rest of the tools assumes ASCII encoding.
>
> This patch recognizes first @documentencoding in input and places
> it at the beginning of output as =encoding directive.
>
> Second, run pod2man with --utf8 option to enable utf8 in manpages:
>
> This option makes no difference for manpages which contains only
> ascii chars.  But for manpages with actual UTF8 characters (qemu
> docs contains these), this change allows to see real characters
> instead of mojibakes or substitutes.
>
> Signed-off-By: Michael Tokarev <mjt@tls.msk.ru>
> ---
>  Makefile            |    9 +++++----
>  scripts/texi2pod.pl |    9 +++++++++
>  2 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/Makefile b/Makefile
> index 1bc3cb0..8d6b558 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -347,28 +347,29 @@ QMP/qmp-commands.txt: $(SRC_PATH)/qmp-commands.hx
>  qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx
>        $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")
>
> +POD2MAN = pod2man --utf8
>  qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi
>        $(call quiet-command, \
>          perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu.pod && \
> -         pod2man --section=1 --center=" " --release=" " qemu.pod > $@, \
> +         $(POD2MAN) --section=1 --center=" " --release=" " qemu.pod > $@, \
>          "  GEN   $@")
>
>  qemu-img.1: qemu-img.texi qemu-img-cmds.texi
>        $(call quiet-command, \
>          perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-img.pod && \
> -         pod2man --section=1 --center=" " --release=" " qemu-img.pod > $@, \
> +         $(POD2MAN) --section=1 --center=" " --release=" " qemu-img.pod > $@, \
>          "  GEN   $@")
>
>  fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi
>        $(call quiet-command, \
>          perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< fsdev/virtfs-proxy-helper.pod && \
> -         pod2man --section=1 --center=" " --release=" " fsdev/virtfs-proxy-helper.pod > $@, \
> +         $(POD2MAN) --section=1 --center=" " --release=" " fsdev/virtfs-proxy-helper.pod > $@, \
>          "  GEN   $@")
>
>  qemu-nbd.8: qemu-nbd.texi
>        $(call quiet-command, \
>          perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-nbd.pod && \
> -         pod2man --section=8 --center=" " --release=" " qemu-nbd.pod > $@, \
> +         $(POD2MAN) --section=8 --center=" " --release=" " qemu-nbd.pod > $@, \
>          "  GEN   $@")
>
>  dvi: qemu-doc.dvi qemu-tech.dvi
> diff --git a/scripts/texi2pod.pl b/scripts/texi2pod.pl
> index 9ed056a..94097fb 100755
> --- a/scripts/texi2pod.pl
> +++ b/scripts/texi2pod.pl
> @@ -36,6 +36,7 @@ $fnno = 1;
>  $inf = "";
>  $ibase = "";
>  @ipath = ();
> +$encoding = undef;
>
>  while ($_ = shift) {
>     if (/^-D(.*)$/) {
> @@ -97,6 +98,12 @@ while(<$inf>) {
>     /^\@setfilename\s+([^.]+)/ and $fn = $1, next;
>     /^\@settitle\s+([^.]+)/ and $tl = postprocess($1), next;
>
> +    # Look for document encoding
> +    /^\@documentencoding\s+([^.]+)/ and do {
> +        $encoding = $1 unless defined $encoding;
> +        next;
> +    };
> +
>     # Identify a man title but keep only the one we are interested in.
>     /^\@c\s+man\s+title\s+([A-Za-z0-9-]+)\s+(.+)/ and do {
>        if (exists $defs{$1}) {
> @@ -336,6 +343,8 @@ $inf = pop @instack;
>
>  die "No filename or title\n" unless defined $fn && defined $tl;
>
> +print "=encoding $encoding\n\n" if defined $encoding;
> +
>  $sects{NAME} = "$fn \- $tl\n";
>  $sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES};
>
> --
> 1.7.9.1
>
>
diff mbox

Patch

diff --git a/Makefile b/Makefile
index 1bc3cb0..8d6b558 100644
--- a/Makefile
+++ b/Makefile
@@ -347,28 +347,29 @@  QMP/qmp-commands.txt: $(SRC_PATH)/qmp-commands.hx
 qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")
 
+POD2MAN = pod2man --utf8
 qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu.pod && \
-	  pod2man --section=1 --center=" " --release=" " qemu.pod > $@, \
+	  $(POD2MAN) --section=1 --center=" " --release=" " qemu.pod > $@, \
 	  "  GEN   $@")
 
 qemu-img.1: qemu-img.texi qemu-img-cmds.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-img.pod && \
-	  pod2man --section=1 --center=" " --release=" " qemu-img.pod > $@, \
+	  $(POD2MAN) --section=1 --center=" " --release=" " qemu-img.pod > $@, \
 	  "  GEN   $@")
 
 fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< fsdev/virtfs-proxy-helper.pod && \
-	  pod2man --section=1 --center=" " --release=" " fsdev/virtfs-proxy-helper.pod > $@, \
+	  $(POD2MAN) --section=1 --center=" " --release=" " fsdev/virtfs-proxy-helper.pod > $@, \
 	  "  GEN   $@")
 
 qemu-nbd.8: qemu-nbd.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-nbd.pod && \
-	  pod2man --section=8 --center=" " --release=" " qemu-nbd.pod > $@, \
+	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-nbd.pod > $@, \
 	  "  GEN   $@")
 
 dvi: qemu-doc.dvi qemu-tech.dvi
diff --git a/scripts/texi2pod.pl b/scripts/texi2pod.pl
index 9ed056a..94097fb 100755
--- a/scripts/texi2pod.pl
+++ b/scripts/texi2pod.pl
@@ -36,6 +36,7 @@  $fnno = 1;
 $inf = "";
 $ibase = "";
 @ipath = ();
+$encoding = undef;
 
 while ($_ = shift) {
     if (/^-D(.*)$/) {
@@ -97,6 +98,12 @@  while(<$inf>) {
     /^\@setfilename\s+([^.]+)/ and $fn = $1, next;
     /^\@settitle\s+([^.]+)/ and $tl = postprocess($1), next;
 
+    # Look for document encoding
+    /^\@documentencoding\s+([^.]+)/ and do {
+        $encoding = $1 unless defined $encoding;
+        next;
+    };
+
     # Identify a man title but keep only the one we are interested in.
     /^\@c\s+man\s+title\s+([A-Za-z0-9-]+)\s+(.+)/ and do {
 	if (exists $defs{$1}) {
@@ -336,6 +343,8 @@  $inf = pop @instack;
 
 die "No filename or title\n" unless defined $fn && defined $tl;
 
+print "=encoding $encoding\n\n" if defined $encoding;
+
 $sects{NAME} = "$fn \- $tl\n";
 $sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES};