diff mbox

[LEDE-DEV,1/4] bugcheck: Add tools to poll for and report certain bugs.

Message ID 1469145162-8332-1-git-send-email-greearb@candelatech.com
State Changes Requested
Headers show

Commit Message

Ben Greear July 21, 2016, 11:52 p.m. UTC
From: Ben Greear <greearb@candelatech.com>

This first release is all about checking for ath10k firmware
crashes.  Could be extended later for other modules/bugs/etc.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
 package/utils/bugcheck/Makefile           |  46 ++++++++++++
 package/utils/bugcheck/src/bugcheck.initd |  16 +++++
 package/utils/bugcheck/src/bugcheck.sh    | 116 ++++++++++++++++++++++++++++++
 package/utils/bugcheck/src/bugchecker.sh  |  29 ++++++++
 4 files changed, 207 insertions(+)
 create mode 100644 package/utils/bugcheck/Makefile
 create mode 100644 package/utils/bugcheck/src/bugcheck.initd
 create mode 100755 package/utils/bugcheck/src/bugcheck.sh
 create mode 100755 package/utils/bugcheck/src/bugchecker.sh

Comments

John Crispin July 28, 2016, 5:13 a.m. UTC | #1
On 22/07/2016 01:52, greearb@candelatech.com wrote:
> From: Ben Greear <greearb@candelatech.com>
> 
> This first release is all about checking for ath10k firmware
> crashes.  Could be extended later for other modules/bugs/etc.
> 

the description could be a little more verbose explaining roughly what
the tool does

> Signed-off-by: Ben Greear <greearb@candelatech.com>
> ---
>  package/utils/bugcheck/Makefile           |  46 ++++++++++++
>  package/utils/bugcheck/src/bugcheck.initd |  16 +++++
>  package/utils/bugcheck/src/bugcheck.sh    | 116 ++++++++++++++++++++++++++++++
>  package/utils/bugcheck/src/bugchecker.sh  |  29 ++++++++

why not use cron instead of running a wrapper script ?

	John

>  4 files changed, 207 insertions(+)
>  create mode 100644 package/utils/bugcheck/Makefile
>  create mode 100644 package/utils/bugcheck/src/bugcheck.initd
>  create mode 100755 package/utils/bugcheck/src/bugcheck.sh
>  create mode 100755 package/utils/bugcheck/src/bugchecker.sh
> 
> diff --git a/package/utils/bugcheck/Makefile b/package/utils/bugcheck/Makefile
> new file mode 100644
> index 0000000..8b5fdd8
> --- /dev/null
> +++ b/package/utils/bugcheck/Makefile
> @@ -0,0 +1,46 @@
> +#
> +# Copyright (C) 2016 OpenWrt.org
> +#
> +# This is free software, licensed under the GNU General Public License v2.
> +# See /LICENSE for more information.
> +#
> +
> +include $(TOPDIR)/rules.mk
> +include $(INCLUDE_DIR)/kernel.mk
> +
> +PKG_NAME:=bugcheck
> +PKG_RELEASE:=2016-07-21
> +
> +include $(INCLUDE_DIR)/package.mk
> +
> +define Package/bugcheck
> +  SECTION:=utils
> +  CATEGORY:=Utilities
> +  TITLE:=Bug checking and reporting utility
> +  VERSION:=$(PKG_RELEASE)
> +  MAINTAINER:=Ben Greear <greearb@candelatech.com>
> +endef
> +
> +define Package/bugcheck/description
> +  Scripts to check for bugs (like firmware crashes) and package them for reporting.
> +endef
> +
> +define Build/Prepare
> +	$(CP) src/bugcheck.sh $(PKG_BUILD_DIR)/
> +	$(CP) src/bugchecker.sh $(PKG_BUILD_DIR)/
> +	$(CP) src/bugcheck.initd $(PKG_BUILD_DIR)/
> +endef
> +
> +define Build/Compile
> +	true
> +endef
> +
> +define Package/bugcheck/install
> +	$(INSTALL_DIR) $(1)/usr/bin
> +	$(INSTALL_DIR) $(1)/etc/init.d
> +	$(INSTALL_BIN) $(PKG_BUILD_DIR)/bugcheck.sh $(1)/usr/bin/
> +	$(INSTALL_BIN) $(PKG_BUILD_DIR)/bugchecker.sh $(1)/usr/bin/
> +	$(INSTALL_BIN) $(PKG_BUILD_DIR)/bugcheck.initd $(1)/etc/init.d/bugcheck
> +endef
> +
> +$(eval $(call BuildPackage,bugcheck))
> diff --git a/package/utils/bugcheck/src/bugcheck.initd b/package/utils/bugcheck/src/bugcheck.initd
> new file mode 100644
> index 0000000..b97a415
> --- /dev/null
> +++ b/package/utils/bugcheck/src/bugcheck.initd
> @@ -0,0 +1,16 @@
> +#!/bin/sh /etc/rc.common
> +# Copyright (C) 2016 OpenWrt.org
> +
> +START=99
> +
> +USE_PROCD=1
> +PROG=/usr/bin/bugchecker.sh
> +
> +# To actually make bugchecker.sh run, see comments
> +# at top of its file.
> +
> +start_service () {
> +        procd_open_instance
> +        procd_set_param command "$PROG"
> +        procd_close_instance
> +}
> diff --git a/package/utils/bugcheck/src/bugcheck.sh b/package/utils/bugcheck/src/bugcheck.sh
> new file mode 100755
> index 0000000..7f35795
> --- /dev/null
> +++ b/package/utils/bugcheck/src/bugcheck.sh
> @@ -0,0 +1,116 @@
> +#!/bin/sh
> +
> +# Check for ath10k (and maybe other) bugs, package them up,
> +# and let user know what to do with them.
> +
> +TMPLOC=/tmp
> +CRASHDIR=$TMPLOC/bugcheck
> +BUGFILE=$TMPLOC/buglog.tgz
> +FOUND_BUG=0
> +
> +# set -x
> +
> +bugcheck_generic()
> +{
> +    echo "LEDE crashlog report" > $CRASHDIR/info.txt
> +    date >> $CRASHDIR/info.txt
> +    echo >> $CRASHDIR/info.txt
> +    echo "uname" >> $CRASHDIR/info.txt
> +    uname -a >> $CRASHDIR/info.txt
> +    echo >> $CRASHDIR/info.txt
> +    echo "os-release" >> $CRASHDIR/info.txt
> +    cat /etc/os-release >> $CRASHDIR/info.txt
> +    echo >> $CRASHDIR/info.txt
> +    echo "os-release" >> $CRASHDIR/info.txt
> +    cat /etc/os-release >> $CRASHDIR/info.txt
> +    echo >> $CRASHDIR/info.txt
> +    echo "dmesg output" >> $CRASHDIR/info.txt
> +    dmesg >> $CRASHDIR/info.txt
> +    if [ -x /usr/bin/lspci ]
> +	then
> +	echo >> $CRASHDIR/info.txt
> +	echo "lspci" >> $CRASHDIR/info.txt
> +	lspci >> $CRASHDIR/info.txt
> +    fi
> +    echo >> $CRASHDIR/info.txt
> +    echo "cpuinfo" >> $CRASHDIR/info.txt
> +    cat /proc/cpuinfo >> $CRASHDIR/info.txt
> +    echo >> $CRASHDIR/info.txt
> +    echo "meminfo" >> $CRASHDIR/info.txt
> +    cat /proc/cpuinfo >> $CRASHDIR/info.txt
> +    echo >> $CRASHDIR/info.txt
> +    echo "cmdline" >> $CRASHDIR/info.txt
> +    cat /proc/cmdline >> $CRASHDIR/info.txt
> +    echo >> $CRASHDIR/info.txt
> +    echo "lsmod" >> $CRASHDIR/info.txt
> +    lsmod >> $CRASHDIR/info.txt
> +}
> +
> +roll_crashes()
> +{
> +    # Roll any existing crashes
> +    if [ -d $CRASHDIR ]
> +	then
> +	if [ -d $CRASHDIR.1 ]
> +	    then
> +	    rm -fr $CRASHDIR.1
> +	fi
> +	mv $CRASHDIR $CRASHDIR.1
> +    fi
> +
> +    # Prepare location
> +    mkdir -p $CRASHDIR
> +}
> +
> +# ath10k, check debugfs entries.
> +for i in /sys/kernel/debug/ieee80211/*/ath10k/fw_crash_dump
> +do
> +  #echo "Checking $i"
> +  if cat $i > $TMPLOC/ath10k_crash.bin 2>&1
> +      then
> +      FOUND_BUG=1
> +
> +      #echo "Found ath10k crash data in $i"
> +      roll_crashes
> +
> +      ADIR=${i/fw_crash_dump/}
> +
> +      CTFW=0
> +      if grep -- -ct- $TMPLOC/ath10k_crash.bin > /dev/null 2>&1
> +	  then
> +	  CTFW=1
> +      fi
> +
> +      echo "Send bug reports to:" > $CRASHDIR/report_to.txt
> +      if [ -f $ADIR/ct_special -o $CTFW == "1" ]
> +	  then
> +	  # Looks like this is CT firmware or driver...
> +	  echo "greearb@candelatech.com" >> $CRASHDIR/report_to.txt
> +	  echo "and/or report or check for duplicates here:" >> $CRASHDIR/report_to.txt
> +	  echo "https://github.com/greearb/ath10k-ct/issues" >> $CRASHDIR/report_to.txt
> +      else
> +	  # Not sure who would want these bug reports for upstream...
> +	  echo "https://www.lede-project.org/" >> $CRASHDIR/report_to.txt
> +      fi
> +      echo >> $CRASHDIR/report_to.txt
> +      echo "Please attach all files in this directory to bug reports." >> $CRASHDIR/report_to.txt
> +
> +      mv $TMPLOC/ath10k_crash.bin $CRASHDIR
> +
> +      # Add any more ath10k specific stuff here.
> +
> +      # And call generic bug reporting logic
> +      bugcheck_generic
> +  fi
> +done
> +
> +if [ $FOUND_BUG == "1" ]
> +    then
> +    # Notify LUCI somehow?
> +    echo "bugcheck.sh found an issue to be reported" > /dev/kmsg
> +    echo "See $CRASHDIR for details on how to report this" > /dev/kmsg
> +    # Let calling code know something was wrong.
> +    exit 1
> +fi
> +
> +exit 0
> diff --git a/package/utils/bugcheck/src/bugchecker.sh b/package/utils/bugcheck/src/bugchecker.sh
> new file mode 100755
> index 0000000..be305af
> --- /dev/null
> +++ b/package/utils/bugcheck/src/bugchecker.sh
> @@ -0,0 +1,29 @@
> +#!/bin/sh
> +
> +# Periodically call bugcheck.sh script
> +
> +CHECKER=bugcheck.sh
> +SLEEPFOR=60
> +
> +DO_BUGCHECK=0
> +
> +# So, to enable this, you create an /etc/config/bugcheck file
> +# with contents like:
> +#  DO_BUGCHECK=1
> +#  export DO_BUGCHECK
> +
> +if [ -f /etc/config/bugcheck ]
> +    then
> +    . /etc/config/bugcheck
> +fi
> +
> +if [ $DO_BUGCHECK == 0 ]
> +then
> +    exit 0
> +fi
> +
> +while true
> +  do
> +  $CHECKER
> +  sleep $SLEEPFOR
> +done
>
Ben Greear July 28, 2016, 1:21 p.m. UTC | #2
On 07/27/2016 10:13 PM, John Crispin wrote:
>
>
> On 22/07/2016 01:52, greearb@candelatech.com wrote:
>> From: Ben Greear <greearb@candelatech.com>
>>
>> This first release is all about checking for ath10k firmware
>> crashes.  Could be extended later for other modules/bugs/etc.
>>
>
> the description could be a little more verbose explaining roughly what
> the tool does

Ok.

>
>> Signed-off-by: Ben Greear <greearb@candelatech.com>
>> ---
>>   package/utils/bugcheck/Makefile           |  46 ++++++++++++
>>   package/utils/bugcheck/src/bugcheck.initd |  16 +++++
>>   package/utils/bugcheck/src/bugcheck.sh    | 116 ++++++++++++++++++++++++++++++
>>   package/utils/bugcheck/src/bugchecker.sh  |  29 ++++++++
>
> why not use cron instead of running a wrapper script ?

I want 1-minute (or maybe even less in future) intervals, and evidently this
is difficult to automatically make work with LEDE cron?

Also, I was asked to rename this project to 'ct-bugcheck', which I have done, but have
not yet posted a patch.

And, I was asked to ensure that the tool did not run by default, which this patch
accomplishes by requiring the user to edit an /etc/config/bugcheck fle.

Been spending time trying to get a linksys with QCA9980 hardware to be stable
under load...firmware is crapping itself currently.

Thanks,
Ben
diff mbox

Patch

diff --git a/package/utils/bugcheck/Makefile b/package/utils/bugcheck/Makefile
new file mode 100644
index 0000000..8b5fdd8
--- /dev/null
+++ b/package/utils/bugcheck/Makefile
@@ -0,0 +1,46 @@ 
+#
+# Copyright (C) 2016 OpenWrt.org
+#
+# This is free software, licensed under the GNU General Public License v2.
+# See /LICENSE for more information.
+#
+
+include $(TOPDIR)/rules.mk
+include $(INCLUDE_DIR)/kernel.mk
+
+PKG_NAME:=bugcheck
+PKG_RELEASE:=2016-07-21
+
+include $(INCLUDE_DIR)/package.mk
+
+define Package/bugcheck
+  SECTION:=utils
+  CATEGORY:=Utilities
+  TITLE:=Bug checking and reporting utility
+  VERSION:=$(PKG_RELEASE)
+  MAINTAINER:=Ben Greear <greearb@candelatech.com>
+endef
+
+define Package/bugcheck/description
+  Scripts to check for bugs (like firmware crashes) and package them for reporting.
+endef
+
+define Build/Prepare
+	$(CP) src/bugcheck.sh $(PKG_BUILD_DIR)/
+	$(CP) src/bugchecker.sh $(PKG_BUILD_DIR)/
+	$(CP) src/bugcheck.initd $(PKG_BUILD_DIR)/
+endef
+
+define Build/Compile
+	true
+endef
+
+define Package/bugcheck/install
+	$(INSTALL_DIR) $(1)/usr/bin
+	$(INSTALL_DIR) $(1)/etc/init.d
+	$(INSTALL_BIN) $(PKG_BUILD_DIR)/bugcheck.sh $(1)/usr/bin/
+	$(INSTALL_BIN) $(PKG_BUILD_DIR)/bugchecker.sh $(1)/usr/bin/
+	$(INSTALL_BIN) $(PKG_BUILD_DIR)/bugcheck.initd $(1)/etc/init.d/bugcheck
+endef
+
+$(eval $(call BuildPackage,bugcheck))
diff --git a/package/utils/bugcheck/src/bugcheck.initd b/package/utils/bugcheck/src/bugcheck.initd
new file mode 100644
index 0000000..b97a415
--- /dev/null
+++ b/package/utils/bugcheck/src/bugcheck.initd
@@ -0,0 +1,16 @@ 
+#!/bin/sh /etc/rc.common
+# Copyright (C) 2016 OpenWrt.org
+
+START=99
+
+USE_PROCD=1
+PROG=/usr/bin/bugchecker.sh
+
+# To actually make bugchecker.sh run, see comments
+# at top of its file.
+
+start_service () {
+        procd_open_instance
+        procd_set_param command "$PROG"
+        procd_close_instance
+}
diff --git a/package/utils/bugcheck/src/bugcheck.sh b/package/utils/bugcheck/src/bugcheck.sh
new file mode 100755
index 0000000..7f35795
--- /dev/null
+++ b/package/utils/bugcheck/src/bugcheck.sh
@@ -0,0 +1,116 @@ 
+#!/bin/sh
+
+# Check for ath10k (and maybe other) bugs, package them up,
+# and let user know what to do with them.
+
+TMPLOC=/tmp
+CRASHDIR=$TMPLOC/bugcheck
+BUGFILE=$TMPLOC/buglog.tgz
+FOUND_BUG=0
+
+# set -x
+
+bugcheck_generic()
+{
+    echo "LEDE crashlog report" > $CRASHDIR/info.txt
+    date >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "uname" >> $CRASHDIR/info.txt
+    uname -a >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "os-release" >> $CRASHDIR/info.txt
+    cat /etc/os-release >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "os-release" >> $CRASHDIR/info.txt
+    cat /etc/os-release >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "dmesg output" >> $CRASHDIR/info.txt
+    dmesg >> $CRASHDIR/info.txt
+    if [ -x /usr/bin/lspci ]
+	then
+	echo >> $CRASHDIR/info.txt
+	echo "lspci" >> $CRASHDIR/info.txt
+	lspci >> $CRASHDIR/info.txt
+    fi
+    echo >> $CRASHDIR/info.txt
+    echo "cpuinfo" >> $CRASHDIR/info.txt
+    cat /proc/cpuinfo >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "meminfo" >> $CRASHDIR/info.txt
+    cat /proc/cpuinfo >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "cmdline" >> $CRASHDIR/info.txt
+    cat /proc/cmdline >> $CRASHDIR/info.txt
+    echo >> $CRASHDIR/info.txt
+    echo "lsmod" >> $CRASHDIR/info.txt
+    lsmod >> $CRASHDIR/info.txt
+}
+
+roll_crashes()
+{
+    # Roll any existing crashes
+    if [ -d $CRASHDIR ]
+	then
+	if [ -d $CRASHDIR.1 ]
+	    then
+	    rm -fr $CRASHDIR.1
+	fi
+	mv $CRASHDIR $CRASHDIR.1
+    fi
+
+    # Prepare location
+    mkdir -p $CRASHDIR
+}
+
+# ath10k, check debugfs entries.
+for i in /sys/kernel/debug/ieee80211/*/ath10k/fw_crash_dump
+do
+  #echo "Checking $i"
+  if cat $i > $TMPLOC/ath10k_crash.bin 2>&1
+      then
+      FOUND_BUG=1
+
+      #echo "Found ath10k crash data in $i"
+      roll_crashes
+
+      ADIR=${i/fw_crash_dump/}
+
+      CTFW=0
+      if grep -- -ct- $TMPLOC/ath10k_crash.bin > /dev/null 2>&1
+	  then
+	  CTFW=1
+      fi
+
+      echo "Send bug reports to:" > $CRASHDIR/report_to.txt
+      if [ -f $ADIR/ct_special -o $CTFW == "1" ]
+	  then
+	  # Looks like this is CT firmware or driver...
+	  echo "greearb@candelatech.com" >> $CRASHDIR/report_to.txt
+	  echo "and/or report or check for duplicates here:" >> $CRASHDIR/report_to.txt
+	  echo "https://github.com/greearb/ath10k-ct/issues" >> $CRASHDIR/report_to.txt
+      else
+	  # Not sure who would want these bug reports for upstream...
+	  echo "https://www.lede-project.org/" >> $CRASHDIR/report_to.txt
+      fi
+      echo >> $CRASHDIR/report_to.txt
+      echo "Please attach all files in this directory to bug reports." >> $CRASHDIR/report_to.txt
+
+      mv $TMPLOC/ath10k_crash.bin $CRASHDIR
+
+      # Add any more ath10k specific stuff here.
+
+      # And call generic bug reporting logic
+      bugcheck_generic
+  fi
+done
+
+if [ $FOUND_BUG == "1" ]
+    then
+    # Notify LUCI somehow?
+    echo "bugcheck.sh found an issue to be reported" > /dev/kmsg
+    echo "See $CRASHDIR for details on how to report this" > /dev/kmsg
+    # Let calling code know something was wrong.
+    exit 1
+fi
+
+exit 0
diff --git a/package/utils/bugcheck/src/bugchecker.sh b/package/utils/bugcheck/src/bugchecker.sh
new file mode 100755
index 0000000..be305af
--- /dev/null
+++ b/package/utils/bugcheck/src/bugchecker.sh
@@ -0,0 +1,29 @@ 
+#!/bin/sh
+
+# Periodically call bugcheck.sh script
+
+CHECKER=bugcheck.sh
+SLEEPFOR=60
+
+DO_BUGCHECK=0
+
+# So, to enable this, you create an /etc/config/bugcheck file
+# with contents like:
+#  DO_BUGCHECK=1
+#  export DO_BUGCHECK
+
+if [ -f /etc/config/bugcheck ]
+    then
+    . /etc/config/bugcheck
+fi
+
+if [ $DO_BUGCHECK == 0 ]
+then
+    exit 0
+fi
+
+while true
+  do
+  $CHECKER
+  sleep $SLEEPFOR
+done