diff mbox series

[v3,5/6] support/scripts/pkgstat: add target CPE reporting

Message ID 1525725005-41435-6-git-send-email-matthew.weber@rockwellcollins.com
State Changes Requested
Headers show
Series CPE ID Support | expand

Commit Message

Matt Weber May 7, 2018, 8:30 p.m. UTC
Signed-off-by: Matthew Weber <matthew.weber@rockwellcollins.com>
---
 support/scripts/pkg-stats | 103 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 86 insertions(+), 17 deletions(-)

Comments

Erik Larsson May 9, 2018, 9 p.m. UTC | #1
Hi!

2018-05-07 22:30 GMT+02:00 Matt Weber <matthew.weber@rockwellcollins.com>:
> Signed-off-by: Matthew Weber <matthew.weber@rockwellcollins.com>
> ---
>  support/scripts/pkg-stats | 103 ++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 86 insertions(+), 17 deletions(-)
>
> diff --git a/support/scripts/pkg-stats b/support/scripts/pkg-stats
> index 43f7e8d..144c00c 100755
> --- a/support/scripts/pkg-stats
> +++ b/support/scripts/pkg-stats
> @@ -24,9 +24,15 @@ from collections import defaultdict
>  import re
>  import subprocess
>  import sys
> +import urllib2
> +import xmltodict
> +import gzip
> +from StringIO import StringIO
> +import csv
>
>  INFRA_RE = re.compile("\$\(eval \$\(([a-z-]*)-package\)\)")
>
> +CPE_XML_URL = "https://static.nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz"
>
>  class Package:
>      all_licenses = list()
> @@ -484,6 +490,62 @@ def dump_html(packages, stats, output):
>          dump_gen_info(f)
>          f.write(html_footer)
>
> +class CPE:
> +    all_cpes = dict()
> +
> +    def get_xml_dict(self):
> +        print "CPE: Fetching xml manifest..."
> +        try:
> +            compressed_cpe_file = urllib2.urlopen(CPE_XML_URL)
> +            print "CPE: Unzipping xml manifest..."
> +            cpe_file = gzip.GzipFile(fileobj=StringIO(compressed_cpe_file.read())).read()
> +            print "CPE: Converting xml manifest to dict..."
> +            self.all_cpes = xmltodict.parse(cpe_file)
> +        except urllib2.HTTPError, e:
> +            print "HTTP Error:", e.code, url
> +        except urllib2.URLError, e:
> +            print "URL Error:", e.reason, url
> +
> +    def find_partial(self,cpe_str):
> +        print "CPE: Searching for partial [%s]" % cpe_str
> +        for cpe in self.all_cpes['cpe-list']['cpe-item']:
> +            if cpe_str in cpe['cpe-23:cpe23-item']['@name']:
> +                return cpe['cpe-23:cpe23-item']['@name']
> +
> +    def find(self,cpe_str):
> +        print "CPE: Searching for [%s]" % cpe_str
> +        for cpe in self.all_cpes['cpe-list']['cpe-item']:
> +            if cpe['cpe-23:cpe23-item']['@name'] == cpe_str:
> +                return cpe['cpe-23:cpe23-item']['@name']
> +
> +def get_target_cpe_report(cpe_report_file, cpe_dict):
> +    report_cpe_exact_match = ""
> +    report_cpe_needing_update = ""
> +    report_cpe_missing = ""
> +
> +    print "CPE: Checking for matches..."
> +    with open(cpe_report_file) as cpe_file:

"with open" will cause an exception if the file does not exist and
Python will print an ugly Traceback before stopping.
Maybe you can add a simple test if the file exists. The reason I found
it was because I made a simple typo when testing it :)

> +        cpe_list = csv.reader(cpe_file)
> +        for cpe in cpe_list:
> +            if "CPE ID" not in cpe[0]:
> +                result = cpe_dict.find(cpe[0])
> +                if not result:
> +                    cpe_no_version = cpe[0].split(":")[0]+":"+cpe[0].split(":")[1]+":"+cpe[0].split(":")[2]+":"+cpe[0].split(":")[3]+":"+cpe[0].split(":")[4]
> +                    result = cpe_dict.find_partial(cpe_no_version)
> +                    if not result:
> +                        report_cpe_missing += cpe[0] + "\n"
> +                    else:
> +                        report_cpe_needing_update += cpe[0] + "\n"
> +                else:
> +                    report_cpe_exact_match += cpe[0] + "\n"
> +    print "CPE: Found EXACT match:"
> +    print report_cpe_exact_match
> +    print "CPE: Found but REQUIRES UPDATE:"
> +    print report_cpe_needing_update
> +    print "CPE: Not found (proposing the following to be added):"
> +    print report_cpe_missing
> +
> +
>
>  def parse_args():
>      parser = argparse.ArgumentParser()
> @@ -493,6 +555,8 @@ def parse_args():
>                          help='Number of packages')
>      parser.add_argument('-p', dest='packages', action='store',
>                          help='List of packages (comma separated)')
> +    parser.add_argument('-c', dest='cpe_report', action='store',
> +                        help='Buildroot CPE Report (csv format)')
>      return parser.parse_args()
>
>
> @@ -505,22 +569,27 @@ def __main__():
>          package_list = args.packages.split(",")
>      else:
>          package_list = None
> -    print "Build package list ..."
> -    packages = get_pkglist(args.npackages, package_list)
> -    print "Getting package make info ..."
> -    package_init_make_info()
> -    print "Getting package details ..."
> -    for pkg in packages:
> -        pkg.set_infra()
> -        pkg.set_license()
> -        pkg.set_hash_info()
> -        pkg.set_patch_count()
> -        pkg.set_check_package_warnings()
> -        pkg.set_current_version()
> -    print "Calculate stats"
> -    stats = calculate_stats(packages)
> -    print "Write HTML"
> -    dump_html(packages, stats, args.output)
> -
> +    if args.cpe_report:
> +        print "Performing Target CPE Report Analysis..."
> +        cpe_dict = CPE()
> +        cpe_dict.get_xml_dict()
> +        get_target_cpe_report(args.cpe_report,cpe_dict)
> +    else:
> +       print "Build package list ..."
> +       packages = get_pkglist(args.npackages, package_list)
> +       print "Getting package make info ..."
> +       package_init_make_info()
> +       print "Getting package details ..."
> +       for pkg in packages:
> +          pkg.set_infra()
> +          pkg.set_license()
> +          pkg.set_hash_info()
> +          pkg.set_patch_count()
> +          pkg.set_check_package_warnings()
> +          pkg.set_current_version()
> +       print "Calculate stats"
> +       stats = calculate_stats(packages)
> +       print "Write HTML"
> +       dump_html(packages, stats, args.output)
>
>  __main__()
> --
> 1.9.1
>
> _______________________________________________
> buildroot mailing list
> buildroot@busybox.net
> http://lists.busybox.net/mailman/listinfo/buildroot

Best regards,
Erik
Matt Weber May 9, 2018, 9:36 p.m. UTC | #2
Erik,

On Wed, May 9, 2018 at 4:00 PM, Erik Larsson
<karl.erik.larsson@gmail.com> wrote:
> Hi!
>
> 2018-05-07 22:30 GMT+02:00 Matt Weber <matthew.weber@rockwellcollins.com>:
>> Signed-off-by: Matthew Weber <matthew.weber@rockwellcollins.com>
>> ---
>>  support/scripts/pkg-stats | 103 ++++++++++++++++++++++++++++++++++++++--------
>>  1 file changed, 86 insertions(+), 17 deletions(-)
>>
[snip]
>> +def get_target_cpe_report(cpe_report_file, cpe_dict):
>> +    report_cpe_exact_match = ""
>> +    report_cpe_needing_update = ""
>> +    report_cpe_missing = ""
>> +
>> +    print "CPE: Checking for matches..."
>> +    with open(cpe_report_file) as cpe_file:
>
> "with open" will cause an exception if the file does not exist and
> Python will print an ugly Traceback before stopping.
> Maybe you can add a simple test if the file exists. The reason I found
> it was because I made a simple typo when testing it :)
>

Noted for next rev of this series.  Any other feedback on the behavior
of a target report?  (csv content, pkgstat output, etc?)


Matt
Ricardo Martincoski May 10, 2018, 3:03 a.m. UTC | #3
Hello,

This is not a complete code-review, but it seems you will send a new iteration.

When sending new iteration(s) of this series, could you run flake8 on patches 5
and 6 and fix all warnings?
https://gitlab.com/RicardoMartincoski/buildroot/-/jobs/67435667


On Mon, May 07, 2018 at 05:30 PM, Matt Weber wrote:

> support/scripts/pkgstat: add target CPE reporting

For this patch and also for patch 6, one of these prefixes would be better:
support/scripts/pkg-stats: ...
pkg-stats: ...

[snip]
> +    def get_xml_dict(self):
> +        print "CPE: Fetching xml manifest..."
> +        try:
> +            compressed_cpe_file = urllib2.urlopen(CPE_XML_URL)
> +            print "CPE: Unzipping xml manifest..."
> +            cpe_file = gzip.GzipFile(fileobj=StringIO(compressed_cpe_file.read())).read()
> +            print "CPE: Converting xml manifest to dict..."
> +            self.all_cpes = xmltodict.parse(cpe_file)
> +        except urllib2.HTTPError, e:
> +            print "HTTP Error:", e.code, url
> +        except urllib2.URLError, e:
> +            print "URL Error:", e.reason, url

(after fixing the warning from flake8)
If the download fails the script continues and dies like this:

CPE: Fetching xml manifest...
URL Error: [Errno -2] Name or service not known
CPE: Checking for matches...
CPE: Searching for [cpe:2.3:a:gnu:bash:4.4.18:*:*:*:*:*:*:*]
Traceback (most recent call last):
  File "support/scripts/pkg-stats", line 595, in <module>
    __main__()
  File "support/scripts/pkg-stats", line 576, in __main__
    get_target_cpe_report(args.cpe_report,cpe_dict)
  File "support/scripts/pkg-stats", line 531, in get_target_cpe_report
    result = cpe_dict.find(cpe[0])
  File "support/scripts/pkg-stats", line 517, in find
    for cpe in self.all_cpes['cpe-list']['cpe-item']:
KeyError: 'cpe-list'

Maybe this is what you want:
        except ...
            print ...
            sys.exit(1)

[snip]
> +def get_target_cpe_report(cpe_report_file, cpe_dict):
> +    report_cpe_exact_match = ""
> +    report_cpe_needing_update = ""
> +    report_cpe_missing = ""
> +
> +    print "CPE: Checking for matches..."
> +    with open(cpe_report_file) as cpe_file:
> +        cpe_list = csv.reader(cpe_file)
> +        for cpe in cpe_list:
> +            if "CPE ID" not in cpe[0]:
           
The .csv file contains always only one header, right?
If so, you could do like it is done on utils/size-stats-compare (code not
tested):
        cpe_list = csv.reader(cpe_file)
        header = next(cpe_list)
        (... code to validate the header ...)
        for cpe in cpe_list:
            (... assume it is an entry, not a header...)

> +                result = cpe_dict.find(cpe[0])
> +                if not result:
> +                    cpe_no_version = cpe[0].split(":")[0]+":"+cpe[0].split(":")[1]+":"+cpe[0].split(":")[2]+":"+cpe[0].split(":")[3]+":"+cpe[0].split(":")[4]

Could this long line be replaced by this one?
                    cpe_no_version = ":".join(cpe[0].split(":", 5)[:5])

[snip]
>  def parse_args():
>      parser = argparse.ArgumentParser()
> @@ -493,6 +555,8 @@ def parse_args():
>                          help='Number of packages')
>      parser.add_argument('-p', dest='packages', action='store',
>                          help='List of packages (comma separated)')
> +    parser.add_argument('-c', dest='cpe_report', action='store',
> +                        help='Buildroot CPE Report (csv format)')

What do you think about this help instead:
                     help='CPE Report generated by make cpe-info (csv format)')
or something like this?

>      return parser.parse_args()
>  
>  
> @@ -505,22 +569,27 @@ def __main__():
[snip]
> +    if args.cpe_report:
> +        print "Performing Target CPE Report Analysis..."
> +        cpe_dict = CPE()
> +        cpe_dict.get_xml_dict()
> +        get_target_cpe_report(args.cpe_report,cpe_dict)

Is this code supposed to be used only to debug the script?
Or is its output to stdout supposed to used by the user to get a list of
packages without cpe info?

I ask this because this code branch still requires a '-o file' to be passed,
but it does not use it.

For the first case (only to debug the script) maybe it is acceptable as is.

But for the second case (script to be called by the end user), it is not nice to
require an argument that is not used. required=True could then be removed from
"parser.add_argument('-o'," and the check could be moved to __main__ (code not
tested):
def __main__():
    args = parse_args()
    if args.output is None and args.cpe_report is None:
        print "ERROR: either argument -o or -c required"
        sys.exit(1)
    if args.output and args.cpe_report:
        print "ERROR: -o and -c are mutually exclusive"
        sys.exit(1)


Regards,
Ricardo
Matt Weber May 10, 2018, 6 p.m. UTC | #4
Ricardo,

On Wed, May 9, 2018 at 10:03 PM, Ricardo Martincoski
<ricardo.martincoski@gmail.com> wrote:
> Hello,
>
> This is not a complete code-review, but it seems you will send a new iteration.
>
> When sending new iteration(s) of this series, could you run flake8 on patches 5
> and 6 and fix all warnings?
> https://gitlab.com/RicardoMartincoski/buildroot/-/jobs/67435667
>
>
> On Mon, May 07, 2018 at 05:30 PM, Matt Weber wrote:
>
>> support/scripts/pkgstat: add target CPE reporting
>
> For this patch and also for patch 6, one of these prefixes would be better:
> support/scripts/pkg-stats: ...
> pkg-stats: ...

Noted, I'm actually just going to also collapse the patch 5 and 6
together (easier to manage with flake8/pep8 updates).

>
> [snip]
>> +    def get_xml_dict(self):
>> +        print "CPE: Fetching xml manifest..."
>> +        try:
>> +            compressed_cpe_file = urllib2.urlopen(CPE_XML_URL)
>> +            print "CPE: Unzipping xml manifest..."
>> +            cpe_file = gzip.GzipFile(fileobj=StringIO(compressed_cpe_file.read())).read()
>> +            print "CPE: Converting xml manifest to dict..."
>> +            self.all_cpes = xmltodict.parse(cpe_file)
>> +        except urllib2.HTTPError, e:
>> +            print "HTTP Error:", e.code, url
>> +        except urllib2.URLError, e:
>> +            print "URL Error:", e.reason, url
>
> (after fixing the warning from flake8)
> If the download fails the script continues and dies like this:
>
> CPE: Fetching xml manifest...
> URL Error: [Errno -2] Name or service not known
> CPE: Checking for matches...
> CPE: Searching for [cpe:2.3:a:gnu:bash:4.4.18:*:*:*:*:*:*:*]
> Traceback (most recent call last):
>   File "support/scripts/pkg-stats", line 595, in <module>
>     __main__()
>   File "support/scripts/pkg-stats", line 576, in __main__
>     get_target_cpe_report(args.cpe_report,cpe_dict)
>   File "support/scripts/pkg-stats", line 531, in get_target_cpe_report
>     result = cpe_dict.find(cpe[0])
>   File "support/scripts/pkg-stats", line 517, in find
>     for cpe in self.all_cpes['cpe-list']['cpe-item']:
> KeyError: 'cpe-list'
>
> Maybe this is what you want:
>         except ...
>             print ...
>             sys.exit(1)

Fixed by adding exit

>
> [snip]
>> +def get_target_cpe_report(cpe_report_file, cpe_dict):
>> +    report_cpe_exact_match = ""
>> +    report_cpe_needing_update = ""
>> +    report_cpe_missing = ""
>> +
>> +    print "CPE: Checking for matches..."
>> +    with open(cpe_report_file) as cpe_file:
>> +        cpe_list = csv.reader(cpe_file)
>> +        for cpe in cpe_list:
>> +            if "CPE ID" not in cpe[0]:
>
> The .csv file contains always only one header, right?
> If so, you could do like it is done on utils/size-stats-compare (code not
> tested):
>         cpe_list = csv.reader(cpe_file)
>         header = next(cpe_list)
>         (... code to validate the header ...)
>         for cpe in cpe_list:
>             (... assume it is an entry, not a header...)
>

Yeah, that works and is cleaner.


>> +                result = cpe_dict.find(cpe[0])
>> +                if not result:
>> +                    cpe_no_version = cpe[0].split(":")[0]+":"+cpe[0].split(":")[1]+":"+cpe[0].split(":")[2]+":"+cpe[0].split(":")[3]+":"+cpe[0].split(":")[4]
>
> Could this long line be replaced by this one?
>                     cpe_no_version = ":".join(cpe[0].split(":", 5)[:5])
>

Yep.   "".join(cpe.split(":")[:5])

> [snip]
>>  def parse_args():
>>      parser = argparse.ArgumentParser()
>> @@ -493,6 +555,8 @@ def parse_args():
>>                          help='Number of packages')
>>      parser.add_argument('-p', dest='packages', action='store',
>>                          help='List of packages (comma separated)')
>> +    parser.add_argument('-c', dest='cpe_report', action='store',
>> +                        help='Buildroot CPE Report (csv format)')
>
> What do you think about this help instead:
>                      help='CPE Report generated by make cpe-info (csv format)')

Sounds good.

> [snip]
>> +    if args.cpe_report:
>> +        print "Performing Target CPE Report Analysis..."
>> +        cpe_dict = CPE()
>> +        cpe_dict.get_xml_dict()
>> +        get_target_cpe_report(args.cpe_report,cpe_dict)
>
> Is this code supposed to be used only to debug the script?

Debug.  However we're working on a follow on patch that will add an
option to generate the xml files used to update a CPE entry.  When
that's added we may optional this output unless a xml output file is
provided.

> I ask this because this code branch still requires a '-o file' to be passed,
> but it does not use it.
>
> For the first case (only to debug the script) maybe it is acceptable as is.
>

I will remove the -o and instead do something like the following?

    elif args.output:
        print "Build package list ..."
        packages = get_pkglist(args.npackages, package_list)
        print "Getting package make info ..."
        package_init_make_info()
        print "Getting package details ..."
        for pkg in packages:
           pkg.set_infra()
           pkg.set_license()
           pkg.set_hash_info()
           pkg.set_patch_count()
           pkg.set_check_package_warnings()
           pkg.set_current_version()
           pkg.set_cpe_info(cpe_dict)
        print "Calculate stats"
        stats = calculate_stats(packages)
        print "Write HTML"
        dump_html(packages, stats, args.output)
    else:
        print "Please provide the -o HTML output file arg"



Matt
Matt Weber May 14, 2018, 9:43 p.m. UTC | #5
Ricardo,

On Thu, May 10, 2018 at 1:00 PM, Matthew Weber
<matthew.weber@rockwellcollins.com> wrote:
>
> Ricardo,
>
> On Wed, May 9, 2018 at 10:03 PM, Ricardo Martincoski
> <ricardo.martincoski@gmail.com> wrote:
> > Hello,
> >
> > This is not a complete code-review, but it seems you will send a new iteration.
> >

Here's v5
http://patchwork.ozlabs.org/project/buildroot/list/?series=43718
diff mbox series

Patch

diff --git a/support/scripts/pkg-stats b/support/scripts/pkg-stats
index 43f7e8d..144c00c 100755
--- a/support/scripts/pkg-stats
+++ b/support/scripts/pkg-stats
@@ -24,9 +24,15 @@  from collections import defaultdict
 import re
 import subprocess
 import sys
+import urllib2
+import xmltodict
+import gzip
+from StringIO import StringIO
+import csv
 
 INFRA_RE = re.compile("\$\(eval \$\(([a-z-]*)-package\)\)")
 
+CPE_XML_URL = "https://static.nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz"
 
 class Package:
     all_licenses = list()
@@ -484,6 +490,62 @@  def dump_html(packages, stats, output):
         dump_gen_info(f)
         f.write(html_footer)
 
+class CPE:
+    all_cpes = dict()
+
+    def get_xml_dict(self):
+        print "CPE: Fetching xml manifest..."
+        try:
+            compressed_cpe_file = urllib2.urlopen(CPE_XML_URL)
+            print "CPE: Unzipping xml manifest..."
+            cpe_file = gzip.GzipFile(fileobj=StringIO(compressed_cpe_file.read())).read()
+            print "CPE: Converting xml manifest to dict..."
+            self.all_cpes = xmltodict.parse(cpe_file)
+        except urllib2.HTTPError, e:
+            print "HTTP Error:", e.code, url
+        except urllib2.URLError, e:
+            print "URL Error:", e.reason, url
+
+    def find_partial(self,cpe_str):
+        print "CPE: Searching for partial [%s]" % cpe_str
+        for cpe in self.all_cpes['cpe-list']['cpe-item']:
+            if cpe_str in cpe['cpe-23:cpe23-item']['@name']:
+                return cpe['cpe-23:cpe23-item']['@name']
+
+    def find(self,cpe_str):
+        print "CPE: Searching for [%s]" % cpe_str
+        for cpe in self.all_cpes['cpe-list']['cpe-item']:
+            if cpe['cpe-23:cpe23-item']['@name'] == cpe_str:
+                return cpe['cpe-23:cpe23-item']['@name']
+
+def get_target_cpe_report(cpe_report_file, cpe_dict):
+    report_cpe_exact_match = ""
+    report_cpe_needing_update = ""
+    report_cpe_missing = ""
+
+    print "CPE: Checking for matches..."
+    with open(cpe_report_file) as cpe_file:
+        cpe_list = csv.reader(cpe_file)
+        for cpe in cpe_list:
+            if "CPE ID" not in cpe[0]:
+                result = cpe_dict.find(cpe[0])
+                if not result:
+                    cpe_no_version = cpe[0].split(":")[0]+":"+cpe[0].split(":")[1]+":"+cpe[0].split(":")[2]+":"+cpe[0].split(":")[3]+":"+cpe[0].split(":")[4]
+                    result = cpe_dict.find_partial(cpe_no_version)
+                    if not result:
+                        report_cpe_missing += cpe[0] + "\n"
+                    else:
+                        report_cpe_needing_update += cpe[0] + "\n"
+                else:
+                    report_cpe_exact_match += cpe[0] + "\n"
+    print "CPE: Found EXACT match:"
+    print report_cpe_exact_match
+    print "CPE: Found but REQUIRES UPDATE:"
+    print report_cpe_needing_update
+    print "CPE: Not found (proposing the following to be added):"
+    print report_cpe_missing
+
+
 
 def parse_args():
     parser = argparse.ArgumentParser()
@@ -493,6 +555,8 @@  def parse_args():
                         help='Number of packages')
     parser.add_argument('-p', dest='packages', action='store',
                         help='List of packages (comma separated)')
+    parser.add_argument('-c', dest='cpe_report', action='store',
+                        help='Buildroot CPE Report (csv format)')
     return parser.parse_args()
 
 
@@ -505,22 +569,27 @@  def __main__():
         package_list = args.packages.split(",")
     else:
         package_list = None
-    print "Build package list ..."
-    packages = get_pkglist(args.npackages, package_list)
-    print "Getting package make info ..."
-    package_init_make_info()
-    print "Getting package details ..."
-    for pkg in packages:
-        pkg.set_infra()
-        pkg.set_license()
-        pkg.set_hash_info()
-        pkg.set_patch_count()
-        pkg.set_check_package_warnings()
-        pkg.set_current_version()
-    print "Calculate stats"
-    stats = calculate_stats(packages)
-    print "Write HTML"
-    dump_html(packages, stats, args.output)
-
+    if args.cpe_report:
+        print "Performing Target CPE Report Analysis..."
+        cpe_dict = CPE()
+        cpe_dict.get_xml_dict()
+        get_target_cpe_report(args.cpe_report,cpe_dict)
+    else:
+       print "Build package list ..."
+       packages = get_pkglist(args.npackages, package_list)
+       print "Getting package make info ..."
+       package_init_make_info()
+       print "Getting package details ..."
+       for pkg in packages:
+          pkg.set_infra()
+          pkg.set_license()
+          pkg.set_hash_info()
+          pkg.set_patch_count()
+          pkg.set_check_package_warnings()
+          pkg.set_current_version()
+       print "Calculate stats"
+       stats = calculate_stats(packages)
+       print "Write HTML"
+       dump_html(packages, stats, args.output)
 
 __main__()