diff mbox

[v7,2/2] monitor: add memory search commands s, sp

Message ID 1434354995-9839-3-git-send-email-hw.claudio@gmail.com
State New
Headers show

Commit Message

Claudio Fontana June 15, 2015, 7:56 a.m. UTC
From: Claudio Fontana <claudio.fontana@huawei.com>

usage is similar to the commands x, xp.

Example with string: looking for "ELF" header in memory:

(qemu) s/1000000cb 0x40001000 "ELF"
searching memory area [0000000040001000-00000000400f5240]
0000000040090001
(qemu) x/20b 0x40090000
0000000040090000: '\x7f' 'E' 'L' 'F' '\x02' '\x01' '\x01' '\x03'
0000000040090008: '\x00' '\x00' '\x00' '\x00' '\x00' '\x00' '\x00' '\x00'
0000000040090010: '\x02' '\x00' '\xb7' '\x00'

Example with value: looking for 64bit variable value 0x990088

(qemu) s/1000000xg 0xffff900042000000 0x990088
searching memory area [ffff900042000000-ffff9000427a1200]
ffff9000424b3000
ffff9000424c1000

Signed-off-by: Claudio Fontana <claudio.fontana@huawei.com>
Acked-by: Luiz Capitulino <lcapitulino@redhat.com>
---
 hmp-commands.hx |  28 ++++++++++++
 monitor.c       | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 168 insertions(+)

Comments

Markus Armbruster June 16, 2015, 4:30 p.m. UTC | #1
hw.claudio@gmail.com writes:

> From: Claudio Fontana <claudio.fontana@huawei.com>
>
> usage is similar to the commands x, xp.
>
> Example with string: looking for "ELF" header in memory:
>
> (qemu) s/1000000cb 0x40001000 "ELF"
> searching memory area [0000000040001000-00000000400f5240]
> 0000000040090001
> (qemu) x/20b 0x40090000
> 0000000040090000: '\x7f' 'E' 'L' 'F' '\x02' '\x01' '\x01' '\x03'
> 0000000040090008: '\x00' '\x00' '\x00' '\x00' '\x00' '\x00' '\x00' '\x00'
> 0000000040090010: '\x02' '\x00' '\xb7' '\x00'
>
> Example with value: looking for 64bit variable value 0x990088
>
> (qemu) s/1000000xg 0xffff900042000000 0x990088
> searching memory area [ffff900042000000-ffff9000427a1200]
> ffff9000424b3000
> ffff9000424c1000
>
> Signed-off-by: Claudio Fontana <claudio.fontana@huawei.com>
> Acked-by: Luiz Capitulino <lcapitulino@redhat.com>
> ---
>  hmp-commands.hx |  28 ++++++++++++
>  monitor.c       | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 168 insertions(+)

The command is HMP-only.  Please explain in the commit message why this
is okay.

> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index 3d7dfcc..b5d972d 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -429,6 +429,34 @@ Start gdbserver session (default @var{port}=1234)
>  ETEXI
>  
>      {
> +        .name       = "s",
> +        .args_type  = "fmt:/,addr:l,data:s",
> +        .params     = "/fmt addr data",
> +        .help       = "search virtual memory starting at 'addr' for 'data'",
> +        .mhandler.cmd = hmp_memory_search,
> +    },
> +
> +STEXI
> +@item s/fmt @var{addr} @var{data}
> +@findex s
> +Virtual memory search starting at @var{addr} for data described by @var{data}.
> +ETEXI

fmt:/ evaluates into (@count, @format, @size).  Semantics?  Does @format
affect just the output?  Or does it affect interpretation of @data, too?
How?

Is it possible to search for arbitrary byte sequences?  Including zero
bytes?

If no: I'm willing to accept restrictions, because similar restrictions
already exist elsewhere, e.g. in HMP ringbuf_write (QMP ringbuf_write is
general, though).  However, I want you to document them, both in the
commit message and the manual, i.e. between STEXI and ETEXI above.

> +
> +    {
> +        .name       = "sp",
> +        .args_type  = "fmt:/,addr:l,data:s",
> +        .params     = "/fmt addr data",
> +        .help       = "search physical memory starting at 'addr' for 'data'",
> +        .mhandler.cmd = hmp_physical_memory_search,
> +    },
> +
> +STEXI
> +@item sp/fmt @var{addr} @var{data}
> +@findex sp
> +Physical memory search starting at @var{addr} for data described by @var{data}.
> +ETEXI

Likewise.

> +
> +    {
>          .name       = "x",
>          .args_type  = "fmt:/,addr:l",
>          .params     = "/fmt addr",
> diff --git a/monitor.c b/monitor.c
> index bcc20d5..fc6e15b 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -1127,6 +1127,124 @@ static void monitor_printc(Monitor *mon, int c)
>      monitor_printf(mon, "'");
>  }
>  
> +static void monitor_print_addr(Monitor *mon, hwaddr addr, bool is_physical)
> +{
> +    if (is_physical) {
> +        monitor_printf(mon, TARGET_FMT_plx "\n", addr);
> +    } else {
> +        monitor_printf(mon, TARGET_FMT_lx "\n", (target_ulong)addr);
> +    }
> +}

Used just once in your patch.  Matter of taste.  If your taste asks for
it, you get to put it to use in memory_dump(), too ;)

> +
> +/* simple memory search for a byte sequence. The sequence is generated from
> + * a numeric value to look for in guest memory, or from a string.
> + */

See my remark on function comments in my review of PATCH 1.

> +static void memory_search(Monitor *mon, int count, int format, int wsize,
> +                          hwaddr addr, const char *data_str, bool is_physical)

Shouldn't count and wsize be unsigned?

Shouldn't format be char?

Hmm, you merely follow memory_dump()'s example.  Asking you to clean
that up to get your patch in wouldn't be fair.

Okay.

> +{
> +    int pos, len;              /* pos in the search area, len of area */
> +    char *hay;                 /* buffer for haystack */
> +    int hay_size;              /* haystack size. Needle size is wsize. */
> +    const char *needle = NULL; /* needle to search in the haystack */
> +    const char *format_str;    /* numeric input format string */
> +    char value_raw[8];         /* numeric input converted to raw data */
> +#define MONITOR_S_CHUNK_SIZE 16000

Why 16000?

> +
> +    len = wsize * count;
> +    if (len < 1) {
> +        monitor_printf(mon, "invalid search area length.\n");

I suspect the only way to get here is actually wsize * count overflowing
int.

> +        return;
> +    }
> +    switch (format) {
> +    case 'i':
> +        monitor_printf(mon, "format '%c' not supported.\n", format);
> +        return;

A comment reminding the reader *why* it's not supported wouldn't hurt:
we don't support searching 

> +    case 'c':
> +        needle = data_str;
> +        wsize = strlen(data_str);
> +        if (wsize > MONITOR_S_CHUNK_SIZE) {
> +            monitor_printf(mon, "search string too long [max %d].\n",
> +                           MONITOR_S_CHUNK_SIZE);
> +            return;
> +        }
> +        break;
> +    case 'o':
> +        format_str = "%" SCNo64;
> +        break;
> +    default:
> +    case 'x':
> +        format_str = "%" SCNx64;
> +        break;
> +    case 'u':
> +        format_str = "%" SCNu64;
> +        break;
> +    case 'd':
> +        format_str = "%" SCNd64;
> +        break;
> +    }
> +    if (format != 'c') {
> +        uint64_t value;      /* numeric input value */
> +        void *from = &value;
> +        if (sscanf(data_str, format_str, &value) != 1) {
> +            monitor_printf(mon, "could not parse search string "
> +                           "\"%s\" as format '%c'.\n", data_str, format);
> +            return;
> +        }
> +#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
> +        value = bswap64(value);
> +#endif
> +#if defined(TARGET_WORDS_BIGENDIAN)
> +        from += 8 - wsize;
> +#endif

This looks like you want to extract the least significant wsize bytes of
value into value_raw.  Either you do this more cleanly, or you prepend a
comment explaining what this hackery is supposed to do.

> +        memcpy(value_raw, from, wsize);
> +        needle = value_raw;
> +    }

Aha, @format appears to affect interpretation of data.

Looks like data can either be a string or a single number.  Correct?

> +    monitor_printf(mon, "searching memory area ");
> +    if (is_physical) {
> +        monitor_printf(mon, "[" TARGET_FMT_plx "-" TARGET_FMT_plx "]\n",
> +                       addr, addr + len);
> +    } else {
> +        monitor_printf(mon, "[" TARGET_FMT_lx "-" TARGET_FMT_lx "]\n",
> +                       (target_ulong)addr, (target_ulong)addr + len);
> +    }

Rather chatty.  Why is it useful to tell the user what he just told us?

> +    hay_size = len < MONITOR_S_CHUNK_SIZE ? len : MONITOR_S_CHUNK_SIZE;

Why not simply MIN(len, MONITOR_S_CHUNK_SIZE)?

> +    hay = g_malloc0(hay_size);

Why do you need the buffer zeroed?

> +
> +    for (pos = 0; pos < len;) {
> +        char *mark, *match; /* mark new starting position, eventual match */
> +        int l, todo;        /* total length to be processed in current chunk */
> +        l = len - pos;
> +        if (l > hay_size) {
> +            l = hay_size;
> +        }
> +        if (is_physical) {
> +            cpu_physical_memory_read(addr, hay, l);
> +        } else if (cpu_memory_rw_debug(mon_get_cpu(), addr,
> +                                       (uint8_t *)hay, l, 0) < 0) {
> +            monitor_printf(mon, " Cannot access memory\n");
> +            break;
> +        }
> +        for (mark = hay, todo = l; todo >= wsize;) {
> +            match = memmem(mark, todo, needle, wsize);
> +            if (!match) {
> +                break;
> +            }
> +            monitor_print_addr(mon, addr + (match - hay), is_physical);
> +            mark = match + 1;
> +            todo = mark - hay;
> +        }
> +        if (pos + l < len) {
> +            /* catch potential matches across chunks. */
> +            pos += l - (wsize - 1);
> +            addr += l - (wsize - 1);
> +        } else {
> +            pos += l;
> +            addr += l;
> +        }
> +    }

Maybe I'm having a dumb day, but I find this loop pretty impenetrable.
Can you state its invariants?

> +    g_free(hay);
> +}
> +
>  static void memory_dump(Monitor *mon, int count, int format, int wsize,
>                          hwaddr addr, int is_physical)
>  {
> @@ -1250,6 +1368,28 @@ static void memory_dump(Monitor *mon, int count, int format, int wsize,
>      }
>  }
>  
> +static void hmp_memory_search(Monitor *mon, const QDict *qdict)
> +{
> +    int count = qdict_get_int(qdict, "count");
> +    int format = qdict_get_int(qdict, "format");
> +    int size = qdict_get_int(qdict, "size");
> +    target_long addr = qdict_get_int(qdict, "addr");
> +    const char *data_str = qdict_get_str(qdict, "data");
> +
> +    memory_search(mon, count, format, size, addr, data_str, false);
> +}
> +
> +static void hmp_physical_memory_search(Monitor *mon, const QDict *qdict)
> +{
> +    int count = qdict_get_int(qdict, "count");
> +    int format = qdict_get_int(qdict, "format");
> +    int size = qdict_get_int(qdict, "size");
> +    hwaddr addr = qdict_get_int(qdict, "addr");
> +    const char *data_str = qdict_get_str(qdict, "data");
> +
> +    memory_search(mon, count, format, size, addr, data_str, true);
> +}
> +
>  static void hmp_memory_dump(Monitor *mon, const QDict *qdict)
>  {
>      int count = qdict_get_int(qdict, "count");

Just for comparison, here's GDB's command to search memory:

    'find [/SN] START_ADDR, +LEN, VAL1 [, VAL2, ...]'
    'find [/SN] START_ADDR, END_ADDR, VAL1 [, VAL2, ...]'
         Search memory for the sequence of bytes specified by VAL1, VAL2,
         etc.  The search begins at address START_ADDR and continues for
         either LEN bytes or through to END_ADDR inclusive.

       S and N are optional parameters.  They may be specified in either
    order, apart or together.

    S, search query size
         The size of each search query value.

         'b'
              bytes
         'h'
              halfwords (two bytes)
         'w'
              words (four bytes)
         'g'
              giant words (eight bytes)

         All values are interpreted in the current language.  This means,
         for example, that if the current source language is C/C++ then
         searching for the string "hello" includes the trailing '\0'.

         If the value size is not specified, it is taken from the value's
         type in the current language.  This is useful when one wants to
         specify the search pattern as a mixture of types.  Note that this
         means, for example, that in the case of C-like languages a search
         for an untyped 0x42 will search for '(int) 0x42' which is typically
         four bytes.

    N, maximum number of finds
         The maximum number of matches to print.  The default is to print
         all finds.

       You can use strings as search values.  Quote them with double-quotes
    ('"').  The string value is copied into the search pattern byte by byte,
    regardless of the endianness of the target and the size specification.

https://sourceware.org/gdb/current/onlinedocs/gdb/Searching-Memory.html#Searching-Memory
diff mbox

Patch

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 3d7dfcc..b5d972d 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -429,6 +429,34 @@  Start gdbserver session (default @var{port}=1234)
 ETEXI
 
     {
+        .name       = "s",
+        .args_type  = "fmt:/,addr:l,data:s",
+        .params     = "/fmt addr data",
+        .help       = "search virtual memory starting at 'addr' for 'data'",
+        .mhandler.cmd = hmp_memory_search,
+    },
+
+STEXI
+@item s/fmt @var{addr} @var{data}
+@findex s
+Virtual memory search starting at @var{addr} for data described by @var{data}.
+ETEXI
+
+    {
+        .name       = "sp",
+        .args_type  = "fmt:/,addr:l,data:s",
+        .params     = "/fmt addr data",
+        .help       = "search physical memory starting at 'addr' for 'data'",
+        .mhandler.cmd = hmp_physical_memory_search,
+    },
+
+STEXI
+@item sp/fmt @var{addr} @var{data}
+@findex sp
+Physical memory search starting at @var{addr} for data described by @var{data}.
+ETEXI
+
+    {
         .name       = "x",
         .args_type  = "fmt:/,addr:l",
         .params     = "/fmt addr",
diff --git a/monitor.c b/monitor.c
index bcc20d5..fc6e15b 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1127,6 +1127,124 @@  static void monitor_printc(Monitor *mon, int c)
     monitor_printf(mon, "'");
 }
 
+static void monitor_print_addr(Monitor *mon, hwaddr addr, bool is_physical)
+{
+    if (is_physical) {
+        monitor_printf(mon, TARGET_FMT_plx "\n", addr);
+    } else {
+        monitor_printf(mon, TARGET_FMT_lx "\n", (target_ulong)addr);
+    }
+}
+
+/* simple memory search for a byte sequence. The sequence is generated from
+ * a numeric value to look for in guest memory, or from a string.
+ */
+static void memory_search(Monitor *mon, int count, int format, int wsize,
+                          hwaddr addr, const char *data_str, bool is_physical)
+{
+    int pos, len;              /* pos in the search area, len of area */
+    char *hay;                 /* buffer for haystack */
+    int hay_size;              /* haystack size. Needle size is wsize. */
+    const char *needle = NULL; /* needle to search in the haystack */
+    const char *format_str;    /* numeric input format string */
+    char value_raw[8];         /* numeric input converted to raw data */
+#define MONITOR_S_CHUNK_SIZE 16000
+
+    len = wsize * count;
+    if (len < 1) {
+        monitor_printf(mon, "invalid search area length.\n");
+        return;
+    }
+    switch (format) {
+    case 'i':
+        monitor_printf(mon, "format '%c' not supported.\n", format);
+        return;
+    case 'c':
+        needle = data_str;
+        wsize = strlen(data_str);
+        if (wsize > MONITOR_S_CHUNK_SIZE) {
+            monitor_printf(mon, "search string too long [max %d].\n",
+                           MONITOR_S_CHUNK_SIZE);
+            return;
+        }
+        break;
+    case 'o':
+        format_str = "%" SCNo64;
+        break;
+    default:
+    case 'x':
+        format_str = "%" SCNx64;
+        break;
+    case 'u':
+        format_str = "%" SCNu64;
+        break;
+    case 'd':
+        format_str = "%" SCNd64;
+        break;
+    }
+    if (format != 'c') {
+        uint64_t value;      /* numeric input value */
+        void *from = &value;
+        if (sscanf(data_str, format_str, &value) != 1) {
+            monitor_printf(mon, "could not parse search string "
+                           "\"%s\" as format '%c'.\n", data_str, format);
+            return;
+        }
+#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
+        value = bswap64(value);
+#endif
+#if defined(TARGET_WORDS_BIGENDIAN)
+        from += 8 - wsize;
+#endif
+        memcpy(value_raw, from, wsize);
+        needle = value_raw;
+    }
+    monitor_printf(mon, "searching memory area ");
+    if (is_physical) {
+        monitor_printf(mon, "[" TARGET_FMT_plx "-" TARGET_FMT_plx "]\n",
+                       addr, addr + len);
+    } else {
+        monitor_printf(mon, "[" TARGET_FMT_lx "-" TARGET_FMT_lx "]\n",
+                       (target_ulong)addr, (target_ulong)addr + len);
+    }
+    hay_size = len < MONITOR_S_CHUNK_SIZE ? len : MONITOR_S_CHUNK_SIZE;
+    hay = g_malloc0(hay_size);
+
+    for (pos = 0; pos < len;) {
+        char *mark, *match; /* mark new starting position, eventual match */
+        int l, todo;        /* total length to be processed in current chunk */
+        l = len - pos;
+        if (l > hay_size) {
+            l = hay_size;
+        }
+        if (is_physical) {
+            cpu_physical_memory_read(addr, hay, l);
+        } else if (cpu_memory_rw_debug(mon_get_cpu(), addr,
+                                       (uint8_t *)hay, l, 0) < 0) {
+            monitor_printf(mon, " Cannot access memory\n");
+            break;
+        }
+        for (mark = hay, todo = l; todo >= wsize;) {
+            match = memmem(mark, todo, needle, wsize);
+            if (!match) {
+                break;
+            }
+            monitor_print_addr(mon, addr + (match - hay), is_physical);
+            mark = match + 1;
+            todo = mark - hay;
+        }
+        if (pos + l < len) {
+            /* catch potential matches across chunks. */
+            pos += l - (wsize - 1);
+            addr += l - (wsize - 1);
+        } else {
+            pos += l;
+            addr += l;
+        }
+    }
+    g_free(hay);
+}
+
 static void memory_dump(Monitor *mon, int count, int format, int wsize,
                         hwaddr addr, int is_physical)
 {
@@ -1250,6 +1368,28 @@  static void memory_dump(Monitor *mon, int count, int format, int wsize,
     }
 }
 
+static void hmp_memory_search(Monitor *mon, const QDict *qdict)
+{
+    int count = qdict_get_int(qdict, "count");
+    int format = qdict_get_int(qdict, "format");
+    int size = qdict_get_int(qdict, "size");
+    target_long addr = qdict_get_int(qdict, "addr");
+    const char *data_str = qdict_get_str(qdict, "data");
+
+    memory_search(mon, count, format, size, addr, data_str, false);
+}
+
+static void hmp_physical_memory_search(Monitor *mon, const QDict *qdict)
+{
+    int count = qdict_get_int(qdict, "count");
+    int format = qdict_get_int(qdict, "format");
+    int size = qdict_get_int(qdict, "size");
+    hwaddr addr = qdict_get_int(qdict, "addr");
+    const char *data_str = qdict_get_str(qdict, "data");
+
+    memory_search(mon, count, format, size, addr, data_str, true);
+}
+
 static void hmp_memory_dump(Monitor *mon, const QDict *qdict)
 {
     int count = qdict_get_int(qdict, "count");