diff mbox series

[v2] scripts/checkpatch: Support codespell checking

Message ID 20231215103448.3822284-1-zhao1.liu@linux.intel.com
State New
Headers show
Series [v2] scripts/checkpatch: Support codespell checking | expand

Commit Message

Zhao Liu Dec. 15, 2023, 10:34 a.m. UTC
From: Zhao Liu <zhao1.liu@intel.com>

Add two spelling check options (--codespell and --codespellfile) to
enhance spelling check through dictionary, which copied the Linux
kernel's implementation in checkpatch.pl.

This check uses the dictionary at "/usr/share/codespell/dictionary.txt"
by default, if there is no dictionary specified under this path, it
will look for the dictionary of python3's codespell (This requires user
to add python3's path in environment variable $PATH, and to install
codespell by "pip install codespell").

Tested-by: Yongwei Ma <yongwei.ma@intel.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
---
Changes since v1:
* Drop the default dictionary "selling.text" and just support optional
  spelling check via --codespell and --codespellfile. (Thomas)

v1: https://lore.kernel.org/qemu-devel/20231204082917.2430223-1-zhao1.liu@linux.intel.com/

(CC more maintainers who are suggested by get_maintainer.pl in v2.)
---
 scripts/checkpatch.pl | 125 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 105 insertions(+), 20 deletions(-)

Comments

Zhao Liu Jan. 3, 2024, 2:07 p.m. UTC | #1
Hi maintainers,

Just a kindly ping for review. :-)

Thanks,
Zhao

On Fri, Dec 15, 2023 at 06:34:48PM +0800, Zhao Liu wrote:
> Date: Fri, 15 Dec 2023 18:34:48 +0800
> From: Zhao Liu <zhao1.liu@linux.intel.com>
> Subject: [PATCH v2] scripts/checkpatch: Support codespell checking
> X-Mailer: git-send-email 2.34.1
> 
> From: Zhao Liu <zhao1.liu@intel.com>
> 
> Add two spelling check options (--codespell and --codespellfile) to
> enhance spelling check through dictionary, which copied the Linux
> kernel's implementation in checkpatch.pl.
> 
> This check uses the dictionary at "/usr/share/codespell/dictionary.txt"
> by default, if there is no dictionary specified under this path, it
> will look for the dictionary of python3's codespell (This requires user
> to add python3's path in environment variable $PATH, and to install
> codespell by "pip install codespell").
> 
> Tested-by: Yongwei Ma <yongwei.ma@intel.com>
> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> ---
> Changes since v1:
> * Drop the default dictionary "selling.text" and just support optional
>   spelling check via --codespell and --codespellfile. (Thomas)
> 
> v1: https://lore.kernel.org/qemu-devel/20231204082917.2430223-1-zhao1.liu@linux.intel.com/
> 
> (CC more maintainers who are suggested by get_maintainer.pl in v2.)
> ---
>  scripts/checkpatch.pl | 125 +++++++++++++++++++++++++++++++++++-------
>  1 file changed, 105 insertions(+), 20 deletions(-)
> 
> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> index 6e4100d2a41c..45a5c66e3eab 100755
> --- a/scripts/checkpatch.pl
> +++ b/scripts/checkpatch.pl
> @@ -35,6 +35,9 @@ my $summary_file = 0;
>  my $root;
>  my %debug;
>  my $help = 0;
> +my $codespell = 0;
> +my $codespellfile = "/usr/share/codespell/dictionary.txt";
> +my $user_codespellfile = "";
>  
>  sub help {
>  	my ($exitcode) = @_;
> @@ -66,6 +69,9 @@ Options:
>                               is all off)
>    --test-only=WORD           report only warnings/errors containing WORD
>                               literally
> +  --codespell                Use the codespell dictionary for spelling/typos
> +                             (default:$codespellfile)
> +  --codespellfile            Use this codespell dictionary
>    --color[=WHEN]             Use colors 'always', 'never', or only when output
>                               is a terminal ('auto'). Default is 'auto'.
>    -h, --help, --version      display this help and exit
> @@ -85,28 +91,50 @@ foreach (@ARGV) {
>  }
>  
>  GetOptions(
> -	'q|quiet+'	=> \$quiet,
> -	'tree!'		=> \$tree,
> -	'signoff!'	=> \$chk_signoff,
> -	'patch!'	=> \$chk_patch,
> -	'branch!'	=> \$chk_branch,
> -	'emacs!'	=> \$emacs,
> -	'terse!'	=> \$terse,
> -	'f|file!'	=> \$file,
> -	'strict!'	=> \$no_warnings,
> -	'root=s'	=> \$root,
> -	'summary!'	=> \$summary,
> -	'mailback!'	=> \$mailback,
> -	'summary-file!'	=> \$summary_file,
> -
> -	'debug=s'	=> \%debug,
> -	'test-only=s'	=> \$tst_only,
> -	'color=s'       => \$color,
> -	'no-color'      => sub { $color = 'never'; },
> -	'h|help'	=> \$help,
> -	'version'	=> \$help
> +	'q|quiet+'		=> \$quiet,
> +	'tree!'			=> \$tree,
> +	'signoff!'		=> \$chk_signoff,
> +	'patch!'		=> \$chk_patch,
> +	'branch!'		=> \$chk_branch,
> +	'emacs!'		=> \$emacs,
> +	'terse!'		=> \$terse,
> +	'f|file!'		=> \$file,
> +	'strict!'		=> \$no_warnings,
> +	'root=s'		=> \$root,
> +	'summary!'		=> \$summary,
> +	'mailback!'		=> \$mailback,
> +	'summary-file!'		=> \$summary_file,
> +	'debug=s'		=> \%debug,
> +	'test-only=s'		=> \$tst_only,
> +	'codespell!'		=> \$codespell,
> +	'codespellfile=s'	=> \$user_codespellfile,
> +	'color=s'		=> \$color,
> +	'no-color'		=> sub { $color = 'never'; },
> +	'h|help'		=> \$help,
> +	'version'		=> \$help
>  ) or help(1);
>  
> +if ($user_codespellfile) {
> +	# Use the user provided codespell file unconditionally
> +	$codespellfile = $user_codespellfile;
> +} elsif (!(-f $codespellfile)) {
> +	# If /usr/share/codespell/dictionary.txt is not present, try to find it
> +	# under codespell's install directory: <codespell_root>/data/dictionary.txt
> +	if (($codespell || $help) && which("python3") ne "") {
> +		my $python_codespell_dict = << "EOF";
> +
> +import os.path as op
> +import codespell_lib
> +codespell_dir = op.dirname(codespell_lib.__file__)
> +codespell_file = op.join(codespell_dir, 'data', 'dictionary.txt')
> +print(codespell_file, end='')
> +EOF
> +
> +		my $codespell_dict = `python3 -c "$python_codespell_dict" 2> /dev/null`;
> +		$codespellfile = $codespell_dict if (-f $codespell_dict);
> +	}
> +}
> +
>  help(0) if ($help);
>  
>  my $exit = 0;
> @@ -337,6 +365,36 @@ our @typeList = (
>  	qr{guintptr},
>  );
>  
> +# Load common spelling mistakes and build regular expression list.
> +my $misspellings;
> +my %spelling_fix;
> +
> +if ($codespell) {
> +	if (open(my $spelling, '<', $codespellfile)) {
> +		while (<$spelling>) {
> +			my $line = $_;
> +
> +			$line =~ s/\s*\n?$//g;
> +			$line =~ s/^\s*//g;
> +
> +			next if ($line =~ m/^\s*#/);
> +			next if ($line =~ m/^\s*$/);
> +			next if ($line =~ m/, disabled/i);
> +
> +			$line =~ s/,.*$//;
> +
> +			my ($suspect, $fix) = split(/->/, $line);
> +
> +			$spelling_fix{$suspect} = $fix;
> +		}
> +		close($spelling);
> +	} else {
> +		warn "No codespell typos will be found - file '$codespellfile': $!\n";
> +	}
> +}
> +
> +$misspellings = join("|", sort keys %spelling_fix) if keys %spelling_fix;
> +
>  # This can be modified by sub possible.  Since it can be empty, be careful
>  # about regexes that always match, because they can cause infinite loops.
>  our @modifierList = (
> @@ -477,6 +535,18 @@ sub top_of_kernel_tree {
>  	return 1;
>  }
>  
> +sub which {
> +	my ($bin) = @_;
> +
> +	foreach my $path (split(/:/, $ENV{PATH})) {
> +		if (-e "$path/$bin") {
> +			return "$path/$bin";
> +		}
> +	}
> +
> +	return "";
> +}
> +
>  sub expand_tabs {
>  	my ($str) = @_;
>  
> @@ -1585,6 +1655,21 @@ sub process {
>  			WARN("8-bit UTF-8 used in possible commit log\n" . $herecurr);
>  		}
>  
> +# Check for various typo / spelling mistakes
> +		if (defined($misspellings) &&
> +		    ($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) {
> +			while ($rawline =~ /(?:^|[^\w\-'`])($misspellings)(?:[^\w\-'`]|$)/gi) {
> +				my $typo = $1;
> +				my $blank = copy_spacing($rawline);
> +				my $ptr = substr($blank, 0, $-[1]) . "^" x length($typo);
> +				my $hereptr = "$hereline$ptr\n";
> +				my $typo_fix = $spelling_fix{lc($typo)};
> +				$typo_fix = ucfirst($typo_fix) if ($typo =~ /^[A-Z]/);
> +				$typo_fix = uc($typo_fix) if ($typo =~ /^[A-Z]+$/);
> +				WARN("'$typo' may be misspelled - perhaps '$typo_fix'?\n" . $hereptr);
> +			}
> +		}
> +
>  # ignore non-hunk lines and lines being removed
>  		next if (!$hunk_line || $line =~ /^-/);
>  
> -- 
> 2.34.1
> 
>
Samuel Tardieu Jan. 3, 2024, 2:36 p.m. UTC | #2
> +  --codespell                Use the codespell dictionary for 
> spelling/typos
> +                             (default:$codespellfile)

Nitpick: I would have used a space after ":".

> +	# If /usr/share/codespell/dictionary.txt is not present, 
> try to find it
> +	# under codespell's install directory: 
> <codespell_root>/data/dictionary.txt

This works correctly on my NixOS system using a non-FHS layout and 
properly locates the codespell file.

This patch made me find a typo in one of my commit messages.

Tested-by: Samuel Tardieu <sam@rfc1149.net>

  Sam
Zhao Liu Jan. 4, 2024, 4:21 p.m. UTC | #3
Hi Samuel,

On Wed, Jan 03, 2024 at 03:36:59PM +0100, Samuel Tardieu wrote:
> Date: Wed, 03 Jan 2024 15:36:59 +0100
> From: Samuel Tardieu <samuel.tardieu@telecom-paris.fr>
> Subject: Re: [PATCH v2] scripts/checkpatch: Support codespell checking
> 
> > +  --codespell                Use the codespell dictionary for
> > spelling/typos
> > +                             (default:$codespellfile)
> 
> Nitpick: I would have used a space after ":".

OK! I'll add it to improve the readability of the help printout.

> 
> > +	# If /usr/share/codespell/dictionary.txt is not present, try to find
> > it
> > +	# under codespell's install directory:
> > <codespell_root>/data/dictionary.txt
> 
> This works correctly on my NixOS system using a non-FHS layout and properly
> locates the codespell file.
> 
> This patch made me find a typo in one of my commit messages.
> 
> Tested-by: Samuel Tardieu <sam@rfc1149.net>
>

Thanks! Good to know this enhancement can help you!

Regards,
Zhao
diff mbox series

Patch

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 6e4100d2a41c..45a5c66e3eab 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -35,6 +35,9 @@  my $summary_file = 0;
 my $root;
 my %debug;
 my $help = 0;
+my $codespell = 0;
+my $codespellfile = "/usr/share/codespell/dictionary.txt";
+my $user_codespellfile = "";
 
 sub help {
 	my ($exitcode) = @_;
@@ -66,6 +69,9 @@  Options:
                              is all off)
   --test-only=WORD           report only warnings/errors containing WORD
                              literally
+  --codespell                Use the codespell dictionary for spelling/typos
+                             (default:$codespellfile)
+  --codespellfile            Use this codespell dictionary
   --color[=WHEN]             Use colors 'always', 'never', or only when output
                              is a terminal ('auto'). Default is 'auto'.
   -h, --help, --version      display this help and exit
@@ -85,28 +91,50 @@  foreach (@ARGV) {
 }
 
 GetOptions(
-	'q|quiet+'	=> \$quiet,
-	'tree!'		=> \$tree,
-	'signoff!'	=> \$chk_signoff,
-	'patch!'	=> \$chk_patch,
-	'branch!'	=> \$chk_branch,
-	'emacs!'	=> \$emacs,
-	'terse!'	=> \$terse,
-	'f|file!'	=> \$file,
-	'strict!'	=> \$no_warnings,
-	'root=s'	=> \$root,
-	'summary!'	=> \$summary,
-	'mailback!'	=> \$mailback,
-	'summary-file!'	=> \$summary_file,
-
-	'debug=s'	=> \%debug,
-	'test-only=s'	=> \$tst_only,
-	'color=s'       => \$color,
-	'no-color'      => sub { $color = 'never'; },
-	'h|help'	=> \$help,
-	'version'	=> \$help
+	'q|quiet+'		=> \$quiet,
+	'tree!'			=> \$tree,
+	'signoff!'		=> \$chk_signoff,
+	'patch!'		=> \$chk_patch,
+	'branch!'		=> \$chk_branch,
+	'emacs!'		=> \$emacs,
+	'terse!'		=> \$terse,
+	'f|file!'		=> \$file,
+	'strict!'		=> \$no_warnings,
+	'root=s'		=> \$root,
+	'summary!'		=> \$summary,
+	'mailback!'		=> \$mailback,
+	'summary-file!'		=> \$summary_file,
+	'debug=s'		=> \%debug,
+	'test-only=s'		=> \$tst_only,
+	'codespell!'		=> \$codespell,
+	'codespellfile=s'	=> \$user_codespellfile,
+	'color=s'		=> \$color,
+	'no-color'		=> sub { $color = 'never'; },
+	'h|help'		=> \$help,
+	'version'		=> \$help
 ) or help(1);
 
+if ($user_codespellfile) {
+	# Use the user provided codespell file unconditionally
+	$codespellfile = $user_codespellfile;
+} elsif (!(-f $codespellfile)) {
+	# If /usr/share/codespell/dictionary.txt is not present, try to find it
+	# under codespell's install directory: <codespell_root>/data/dictionary.txt
+	if (($codespell || $help) && which("python3") ne "") {
+		my $python_codespell_dict = << "EOF";
+
+import os.path as op
+import codespell_lib
+codespell_dir = op.dirname(codespell_lib.__file__)
+codespell_file = op.join(codespell_dir, 'data', 'dictionary.txt')
+print(codespell_file, end='')
+EOF
+
+		my $codespell_dict = `python3 -c "$python_codespell_dict" 2> /dev/null`;
+		$codespellfile = $codespell_dict if (-f $codespell_dict);
+	}
+}
+
 help(0) if ($help);
 
 my $exit = 0;
@@ -337,6 +365,36 @@  our @typeList = (
 	qr{guintptr},
 );
 
+# Load common spelling mistakes and build regular expression list.
+my $misspellings;
+my %spelling_fix;
+
+if ($codespell) {
+	if (open(my $spelling, '<', $codespellfile)) {
+		while (<$spelling>) {
+			my $line = $_;
+
+			$line =~ s/\s*\n?$//g;
+			$line =~ s/^\s*//g;
+
+			next if ($line =~ m/^\s*#/);
+			next if ($line =~ m/^\s*$/);
+			next if ($line =~ m/, disabled/i);
+
+			$line =~ s/,.*$//;
+
+			my ($suspect, $fix) = split(/->/, $line);
+
+			$spelling_fix{$suspect} = $fix;
+		}
+		close($spelling);
+	} else {
+		warn "No codespell typos will be found - file '$codespellfile': $!\n";
+	}
+}
+
+$misspellings = join("|", sort keys %spelling_fix) if keys %spelling_fix;
+
 # This can be modified by sub possible.  Since it can be empty, be careful
 # about regexes that always match, because they can cause infinite loops.
 our @modifierList = (
@@ -477,6 +535,18 @@  sub top_of_kernel_tree {
 	return 1;
 }
 
+sub which {
+	my ($bin) = @_;
+
+	foreach my $path (split(/:/, $ENV{PATH})) {
+		if (-e "$path/$bin") {
+			return "$path/$bin";
+		}
+	}
+
+	return "";
+}
+
 sub expand_tabs {
 	my ($str) = @_;
 
@@ -1585,6 +1655,21 @@  sub process {
 			WARN("8-bit UTF-8 used in possible commit log\n" . $herecurr);
 		}
 
+# Check for various typo / spelling mistakes
+		if (defined($misspellings) &&
+		    ($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) {
+			while ($rawline =~ /(?:^|[^\w\-'`])($misspellings)(?:[^\w\-'`]|$)/gi) {
+				my $typo = $1;
+				my $blank = copy_spacing($rawline);
+				my $ptr = substr($blank, 0, $-[1]) . "^" x length($typo);
+				my $hereptr = "$hereline$ptr\n";
+				my $typo_fix = $spelling_fix{lc($typo)};
+				$typo_fix = ucfirst($typo_fix) if ($typo =~ /^[A-Z]/);
+				$typo_fix = uc($typo_fix) if ($typo =~ /^[A-Z]+$/);
+				WARN("'$typo' may be misspelled - perhaps '$typo_fix'?\n" . $hereptr);
+			}
+		}
+
 # ignore non-hunk lines and lines being removed
 		next if (!$hunk_line || $line =~ /^-/);