diff mbox

[gfortran] Cleanup the submodule tests

Message ID DA9EB1DB-F121-49E0-84B9-5FBBC8EDF612@lps.ens.fr
State New
Headers show

Commit Message

Dominique d'Humières April 15, 2017, 11:24 p.m. UTC
I am currently testing the following patch that handle both modules and submodules. It is a little bit clumsy and may not handle all the possible syntax variants. Any comment welcomed!-) Testing in progress.

Dominique


> Le 15 avr. 2017 à 18:50, Janus Weil <janus@gcc.gnu.org> a écrit :
> 
> 2017-04-15 17:54 GMT+02:00 Dominique d'Humières <dominiq@lps.ens.fr>:
>>>> This is indeed doable, but before I’ld like to improve the module cleanup with the following patch
>>> 
>>> Yes, looks very useful to me (makes the regexps much more compact &
>>> readable). In addition, couldn't one use \s for whitespace instead of
>>> \[ \t\]   ?
>> 
>> I have posted what I have in my working tree. I’ll test the use of \s instead of \[ \t\] . Is it really portable?
> 
> Not sure. But at least some of the other files in gcc/testsuite/lib
> seem to use \s as well (e.g. gcc-dg.exp).
> 
> 
>>> I assume your igrep is just a copy of dejagnu's grep with an additional -nocase?
>> 
>> Yes!
> 
> Ok to commit  from my side, if you have tested that it works properly.
> 
> Cheers,
> Janus

Comments

Janus Weil April 16, 2017, 8:13 p.m. UTC | #1
Hi Dominique,

> I am currently testing the following patch that handle both modules and submodules. It is a little bit clumsy and may not handle all the possible syntax variants. Any comment welcomed!-) Testing in progress.

so, how did the testing go?


>  proc list-module-names-1 { file } {
>      set result {}
> -    set tmp [grep $file "^\[ \t\]*((#)?\[ \t\]*include|\[mM\]\[oO\]\[dD\]\[uU\]\[lL\]\[eE\](?!\[ \t\]+\[pP\]\[rR\]\[oO\]\[cC\]\[eE\]\[dD\]\[uU\]\[rR\]\[eE\]\[ \t\]+))\[ \t\]+.*" line]
> +    if {[file isdirectory $file]} {return}
> +    set tmp [igrep $file "^\\s*((#)?\\s*include|(sub)?module(?!\\s+(recursive\\s+)?(procedure|subroutine|function)\\s*))\\s*.*" line]

This is supposed to catch all lines including "module", but not
"module procedure", "module subroutine" etc, right? Why do you need
"recursive" here, but no other attributes like "pure" or "elemental"?


>      if {![string match "" $tmp]} {
>         foreach i $tmp {
> -           regexp "(\[0-9\]+)\[ \t\]+(?:(?:#)?\[ \t\]*include\[ \t\]+)\[\"\](\[^\"\]*)\[\"\]" $i dummy lineno include_file
> +           regexp -nocase "(\[0-9\]+)\\s+(?:(?:#)?\\s*include\\s+)\[\"\'\](\[^\"\'\]*)\[\"\'\]" $i dummy lineno include_file

My regex sorcery may be a bit rusty, but why does \\s need a double
escape while \t appears with single escape?


> @@ -99,7 +100,11 @@ proc list-module-names-1 { file } {
>                 }
>                 continue
>             }
> -           regexp "(\[0-9\]+)\[ \t\]+(?:(\[mM\]\[oO\]\[dD\]\[uU\]\[lL\]\[eE\]\[ \t\]+(?!\[pP\]\[rR\]\[oO\]\[cC\]\[eE\]\[dD\]\[uU\]\[rR\]\[eE\]\[ \t\]+)))(\[^ \t;\]*)" $i i lineno keyword mod
> +           regexp -nocase "(\[0-9\]+)\\s+(module|submodule)\\s*(\[^;\]*)" $i i lineno keyword mod
> +           regsub "\\s*!.*" $mod "" mod
> +           regsub ":\[^)\]*" $mod "" mod
> +           regsub "\\(\\s*" $mod "" mod
> +           regsub "\\s*\\)\\s*" $mod "@" mod
>             if {![info exists lineno]} {
>                 continue
>             }

Not sure I understand this part. Guess I'm spending too little time
with regexps to find them readable :(

Cheers,
Janus
Segher Boessenkool April 17, 2017, 12:36 p.m. UTC | #2
On Sun, Apr 16, 2017 at 10:13:07PM +0200, Janus Weil wrote:
> >      if {![string match "" $tmp]} {
> >         foreach i $tmp {
> > -           regexp "(\[0-9\]+)\[ \t\]+(?:(?:#)?\[ \t\]*include\[ \t\]+)\[\"\](\[^\"\]*)\[\"\]" $i dummy lineno include_file
> > +           regexp -nocase "(\[0-9\]+)\\s+(?:(?:#)?\\s*include\\s+)\[\"\'\](\[^\"\'\]*)\[\"\'\]" $i dummy lineno include_file
> 
> My regex sorcery may be a bit rusty, but why does \\s need a double
> escape while \t appears with single escape?

"\t" is a tab character.
"\\t" is the string \t (which in a regexp stands for a tab).
"\s" is just s (since \s is not a special character).
"\\s" is \s .

Since you do not want any substitution in the regexp string anyway, it is
way more readable if written as a braced string than as a double-quoted
string:

+           regexp -nocase {([0-9]+)\s+(?:(?:#)?\s*include\s+)["']([^"']*)["']} $i dummy lineno include_file

You can also write [0-9] as \d, and (?:#) as just # and the other (?:...)
doesn't do anything either.  So that gives

+           regexp -nocase {(\d+)\s+#?\s*include\s+["']([^"']*)["']} $i dummy lineno include_file

and a little bit more work may make it fit on a line even ;-)


Segher
Segher Boessenkool June 4, 2017, 6:26 p.m. UTC | #3
On Sun, Jun 04, 2017 at 02:52:36PM +0200, Dominique d'Humières wrote:
> >> -           regexp "(\[0-9\]+)\[ \t\]+(?:(?:#)?\[ \t\]*include\[ \t\]+)\[\"\](\[^\"\]*)\[\"\]" $i dummy lineno include_file
> >> +           regexp -nocase "(\[0-9\]+)\\s+(?:(?:#)?\\s*include\\s+)\[\"\'\](\[^\"\'\]*)\[\"\'\]" $i dummy lineno include_file
> > 
> > My regex sorcery may be a bit rusty, but why does \\s need a double
> > escape while \t appears with single escape?
> 
> I have used the Segher’s tips.

Nah, that wasn't a tip.  A tip is: don't use double quotes unless you
actually want double quotes.  You can write this as:

  regexp -nocase {([0-9]+)\s+(?:(?:#)?\s*include\s+)["']([^"']*)["']} $i dummy lineno include_file

or even

  regexp -nocase {([0-9]+)\s+#?\s*include\s+["']([^"']*)["']} $i dummy lineno include_file

(where I removed the useless (?:) groups as well).

You want double quotes if you want command substitution ("bla[stuff]bla"),
variable substitution ("$something"), or backslash substitution (like "\t"
becomes a tab char).  In a regexp you do not usually have any use for this,
and it hurts so much.


Segher
diff mbox

Patch

--- ../_clean/gcc/testsuite/lib/fortran-modules.exp	2017-01-01 17:38:58.000000000 +0100
+++ gcc/testsuite/lib/fortran-modules.exp	2017-04-16 01:16:25.000000000 +0200
@@ -79,10 +79,11 @@  proc list-module-names { files } {
 
 proc list-module-names-1 { file } {
     set result {}
-    set tmp [grep $file "^\[ \t\]*((#)?\[ \t\]*include|\[mM\]\[oO\]\[dD\]\[uU\]\[lL\]\[eE\](?!\[ \t\]+\[pP\]\[rR\]\[oO\]\[cC\]\[eE\]\[dD\]\[uU\]\[rR\]\[eE\]\[ \t\]+))\[ \t\]+.*" line]
+    if {[file isdirectory $file]} {return}
+    set tmp [igrep $file "^\\s*((#)?\\s*include|(sub)?module(?!\\s+(recursive\\s+)?(procedure|subroutine|function)\\s*))\\s*.*" line]
     if {![string match "" $tmp]} {
 	foreach i $tmp {
-	    regexp "(\[0-9\]+)\[ \t\]+(?:(?:#)?\[ \t\]*include\[ \t\]+)\[\"\](\[^\"\]*)\[\"\]" $i dummy lineno include_file
+	    regexp -nocase "(\[0-9\]+)\\s+(?:(?:#)?\\s*include\\s+)\[\"\'\](\[^\"\'\]*)\[\"\'\]" $i dummy lineno include_file
 	    if {[info exists include_file]} {
 		set dir [file dirname $file]
 		set inc "$dir/$include_file"
@@ -99,7 +100,11 @@  proc list-module-names-1 { file } {
 		}
 		continue
 	    }
-	    regexp "(\[0-9\]+)\[ \t\]+(?:(\[mM\]\[oO\]\[dD\]\[uU\]\[lL\]\[eE\]\[ \t\]+(?!\[pP\]\[rR\]\[oO\]\[cC\]\[eE\]\[dD\]\[uU\]\[rR\]\[eE\]\[ \t\]+)))(\[^ \t;\]*)" $i i lineno keyword mod
+	    regexp -nocase "(\[0-9\]+)\\s+(module|submodule)\\s*(\[^;\]*)" $i i lineno keyword mod
+	    regsub "\\s*!.*" $mod "" mod
+	    regsub ":\[^)\]*" $mod "" mod
+	    regsub "\\(\\s*" $mod "" mod
+	    regsub "\\s*\\)\\s*" $mod "@" mod
 	    if {![info exists lineno]} {
 		continue
 	    }
@@ -111,3 +116,54 @@  proc list-module-names-1 { file } {
     }
     return $result
 }
+
+# Looks for case insensitive occurrences of a string in a file.
+#     return:list of lines that matched or NULL if none match.
+#     args:  first arg is the filename,
+#            second is the pattern,
+#            third are any options.
+#     Options: line  - puts line numbers of match in list
+#
+proc igrep { args } {
+
+    set file [lindex $args 0]
+    set pattern [lindex $args 1]
+
+    verbose "Grepping $file for the pattern \"$pattern\"" 3
+
+    set argc [llength $args]
+    if { $argc > 2 } {
+        for { set i 2 } { $i < $argc } { incr i } {
+            append options [lindex $args $i]
+            append options " "
+        }
+    } else {
+        set options ""
+    }
+
+    set i 0
+    set fd [open $file r]
+    while { [gets $fd cur_line]>=0 } {
+        incr i
+        if {[regexp -nocase -- "$pattern" $cur_line match]} {
+            if {![string match "" $options]} {
+                foreach opt $options {
+                    switch $opt {
+                        "line" {
+                            lappend grep_out [concat $i $match]
+                        }
+                    }
+                }
+            } else {
+                lappend grep_out $match
+            }
+        }
+    }
+    close $fd
+    unset fd
+    unset i
+    if {![info exists grep_out]} {
+        set grep_out ""
+    }
+    return $grep_out
+}