Patchwork [U-Boot] Fix hash table deletion to prevent lost entries

login
register
mail settings
Submitter Peter Barada
Date Jan. 19, 2011, 4:32 p.m.
Message ID <4D371208.3090801@logicpd.com>
Download mbox | patch
Permalink /patch/79493/
State Superseded
Headers show

Comments

Peter Barada - Jan. 19, 2011, 4:32 p.m.
>> The hash delete code is in error; instead of just removing the deleted
>> key, it should instead allocate a new hashtable, hash all the keys into
>> the new table except for the deleted key and then reclaim the old table
>> (and deleted key).
> Can you please come up with a patch?

Hashtable delete is broken since freeing entry could break collision
chain for other entries.  Instead free key/data pair and mark entry as 
deleted (via
negative used value) and then skip deleted entries while probing.

Break up hsearch_r into separate henter_r/hfind_r functions for 
readability.
Recycle first deleted entry in probe chain when adding entry to table.
Factor primary/secondary hash calculations into functions and use
in henter_r/hfind_r/hdelete_r.

---
  include/search.h |   16 ++-
  lib/hashtable.c  |  382 
++++++++++++++++++++++++++++++++---------------------
  2 files changed, 241 insertions(+), 157 deletions(-)

-        hval += item.key[count];
+        hval += item->key[count];
      }

      /*
@@ -246,20 +189,95 @@ int hsearch_r(ENTRY item, ACTION action, ENTRY ** 
retval,
      if (hval == 0)
          ++hval;

+    return hval;
+}
+
+/* Secondary hash function */
+int hash_secondary(unsigned int hval, struct hsearch_data *htab)
+{
+    unsigned int hval2;
+
+    /*
+     * Second hash function:
+     * as suggested in [Knuth]
+     */
+    hval2 = 1 + hval % (htab->size - 2);
+    return hval2;
+}
+
+/*
+ * Find an entry in the hashtable. Skip negative used counts; these mark
+ * deleted entries.
+ */
+int hfind_r(ENTRY item, ENTRY ** retval, struct hsearch_data *htab)
+{
+    unsigned int hval;
+    unsigned int idx;
+
      /* The first index tried. */
-    idx = hval;
+    idx = hval = hash_primary(&item, htab);
+
+    debug("hash(%s) = %d/%d; used %d\n", item.key, idx, htab->size, 
htab->table[idx].used);
+
+    do {
+        unsigned int hval2;
+
+        if (htab->table[idx].used == hval
+ && strcmp(item.key, htab->table[idx].entry.key) == 0) {
+            /* return found entry */
+            *retval = &htab->table[idx].entry;
+            return idx;
+        }
+
+        if (!htab->table[idx].used)  {
+            /* end of chain, return empty */
+            __set_errno(ESRCH);
+            *retval = NULL;
+            return 0;
+        }
+
+        hval2 = hash_secondary(hval, htab);

-    if (htab->table[idx].used) {
          /*
-         * Further action might be required according to the
-         * action value.
+         * Because SIZE is prime this guarantees to
+         * step through all available indices.
           */
-        unsigned hval2;
+        if (idx <= hval2)
+            idx = htab->size + idx - hval2;
+        else
+            idx -= hval2;
+
+        /* If we visited all entries leave the loop */
+    } while (idx != hval);
+    __set_errno(ESRCH);
+    *retval = NULL;
+    return 0;
+}
+
+/*
+ * Add/update an entry in the table; search the whole chain
+ * looking for the entry.  Remember first index of deleted entry and
+ * use if found to hold new entry otherwise use empty entry at end of
+ * probe chain
+ */
+int henter_r(ENTRY item, ENTRY ** retval, struct hsearch_data *htab)
+{
+    unsigned int hval;
+    unsigned int idx;
+    int deleted_entry = 0;
+
+    /* The first index tried. */
+    idx = hval = hash_primary(&item, htab);
+
+    debug("hash(%s) = %d/%d; used %d\n", item.key, idx, htab->size, 
htab->table[idx].used);
+
+    do {
+        unsigned int hval2;

          if (htab->table[idx].used == hval
- && strcmp(item.key, htab->table[idx].entry.key) == 0) {
+ && strcmp(item.key, htab->table[idx].entry.key) == 0) {
              /* Overwrite existing value? */
-            if ((action == ENTER) && (item.data != NULL)) {
+            if (item.data != NULL) {
                  free(htab->table[idx].entry.data);
                  htab->table[idx].entry.data =
                      strdup(item.data);
@@ -274,82 +292,122 @@ int hsearch_r(ENTRY item, ACTION action, ENTRY ** 
retval,
              return idx;
          }

-        /*
-         * Second hash function:
-         * as suggested in [Knuth]
-         */
-        hval2 = 1 + hval % (htab->size - 2);
-
-        do {
+        if (htab->table[idx].used < 0) {
+            /* Remember deleted entry to use for inserution */
+            if (!deleted_entry)
+                deleted_entry = idx + 1;
+        } else if (!htab->table[idx].used)  {
              /*
-             * Because SIZE is prime this guarantees to
-             * step through all available indices.
+             * If table is full and another entry should be
+             * entered return with error.
               */
-            if (idx <= hval2)
-                idx = htab->size + idx - hval2;
-            else
-                idx -= hval2;
+            if (htab->filled == htab->size) {
+                __set_errno(ENOMEM);
+                *retval = NULL;
+                return 0;
+            }

              /*
-             * If we visited all entries leave the loop
-             * unsuccessfully.
+             * Create new entry; recyle deleted entry if found
+             * create copies of item.key and item.data
               */
-            if (idx == hval)
-                break;
-
-            /* If entry is found use it. */
-            if ((htab->table[idx].used == hval)
- && strcmp(item.key, htab->table[idx].entry.key) == 0) {
-                /* Overwrite existing value? */
-                if ((action == ENTER) && (item.data != NULL)) {
-                    free(htab->table[idx].entry.data);
-                    htab->table[idx].entry.data =
-                        strdup(item.data);
-                    if (!htab->table[idx].entry.data) {
-                        __set_errno(ENOMEM);
-                        *retval = NULL;
-                        return 0;
-                    }
-                }
-                /* return found entry */
-                *retval = &htab->table[idx].entry;
-                return idx;
+            if (deleted_entry)
+                idx = deleted_entry - 1;
+
+            htab->table[idx].used = hval;
+            htab->table[idx].entry.key = strdup(item.key);
+            htab->table[idx].entry.data = strdup(item.data);
+            if (!htab->table[idx].entry.key ||
+                !htab->table[idx].entry.data) {
+                __set_errno(ENOMEM);
+                *retval = NULL;
+                return 0;
              }
-        }
-        while (htab->table[idx].used);
-    }

-    /* An empty bucket has been found. */
-    if (action == ENTER) {
-        /*
-         * If table is full and another entry should be
-         * entered return with error.
-         */
-        if (htab->filled == htab->size) {
-            __set_errno(ENOMEM);
-            *retval = NULL;
-            return 0;
+            ++htab->filled;
+
+            /* return new entry */
+            *retval = &htab->table[idx].entry;
+            return 1;
          }

+        hval2 = hash_secondary(hval, htab);
+
          /*
-         * Create new entry;
-         * create copies of item.key and item.data
+         * Because SIZE is prime this guarantees to
+         * step through all available indices.
           */
-        htab->table[idx].used = hval;
-        htab->table[idx].entry.key = strdup(item.key);
-        htab->table[idx].entry.data = strdup(item.data);
-        if (!htab->table[idx].entry.key ||
-            !htab->table[idx].entry.data) {
-            __set_errno(ENOMEM);
-            *retval = NULL;
-            return 0;
-        }
+        if (idx <= hval2)
+            idx = htab->size + idx - hval2;
+        else
+            idx -= hval2;
+
+        /* If we visited all entries leave the loop */
+    } while (idx != hval);
+    __set_errno(ESRCH);
+    *retval = NULL;
+    return 0;
+}
+
+
+/*
+ * hsearch()
+ */
+
+int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
+          struct hsearch_data *htab)
+{
+    if (action == FIND)
+        return hfind_r(item, retval, htab);
+    else
+        return henter_r(item, retval, htab);
+}
+
+/*
+ * This is the search function. It uses double hashing with open 
addressing.
+ * The argument item.key has to be a pointer to an zero terminated, most
+ * probably strings of chars. The function for generating a number of the
+ * strings is simple but fast. It can be replaced by a more complex 
function
+ * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
+ *
+ * We use an trick to speed up the lookup. The table is created by hcreate
+ * with one more element available. This enables us to use the index zero
+ * special. This index will never be used because we store the first hash
+ * index in the field used where zero means not used. Every other 
non-negative
+ * value means used and a negative value means a deleted entry. The 
used field
+ * can be used as a first fast comparison for equality of the stored 
and the
+ * parameter value. This helps to prevent unnecessary expensive calls 
of strcmp.
+ *
+ * This implementation differs from the standard library version of
+ * this function in a number of ways:
+ *
+ * - While the standard version does not make any assumptions about
+ *   the type of the stored data objects at all, this implementation
+ *   works with NUL terminated strings only.
+ * - Instead of storing just pointers to the original objects, we
+ *   create local copies so the caller does not need to care about the
+ *   data any more.
+ * - The standard implementation does not provide a way to update an
+ *   existing entry.  This version will create a new entry or update an
+ *   existing one when both "action == ENTER" and "item.data != NULL".
+ * - Instead of returning 1 on success, we return the index into the
+ *   internal hash table, which is also guaranteed to be positive.
+ *   This allows us direct access to the found hash table slot.
+ */

-        ++htab->filled;
+int hmatch_r(const char *match, int last_idx, ENTRY ** retval,
+         struct hsearch_data *htab)
+{
+    unsigned int idx;
+    size_t key_len = strlen(match);

-        /* return new entry */
-        *retval = &htab->table[idx].entry;
-        return 1;
+    for (idx = last_idx + 1; idx < htab->size; ++idx) {
+        if (htab->table[idx].used <= 0)
+            continue;
+        if (!strncmp(match, htab->table[idx].entry.key, key_len)) {
+            *retval = &htab->table[idx].entry;
+            return idx;
+        }
      }

      __set_errno(ESRCH);
@@ -357,7 +415,6 @@ int hsearch_r(ENTRY item, ACTION action, ENTRY ** 
retval,
      return 0;
  }

-
  /*
   * hdelete()
   */
@@ -365,33 +422,56 @@ int hsearch_r(ENTRY item, ACTION action, ENTRY ** 
retval,
  /*
   * The standard implementation of hsearch(3) does not provide any way
   * to delete any entries from the hash table.  We extend the code to
- * do that.
+ * do that.  This is done by storing a negative value in used.
   */

  int hdelete_r(const char *key, struct hsearch_data *htab)
  {
-    ENTRY e, *ep;
-    int idx;
+    ENTRY e;
+    unsigned int hval;
+    unsigned int idx;

      debug("hdelete: DELETE key \"%s\"\n", key);

      e.key = (char *)key;

-    if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) {
-        __set_errno(ESRCH);
-        return 0;    /* not found */
-    }
+    idx = hval = hash_primary(&e, htab);
+    do {
+        unsigned int hval2;

-    /* free used ENTRY */
-    debug("hdelete: DELETING key \"%s\"\n", key);
+        if (htab->table[idx].used == hval
+ && strcmp(e.key, htab->table[idx].entry.key) == 0) {
+            /* Delete key/data pair */
+            free(htab->table[idx].entry.key);
+            free(htab->table[idx].entry.data);
+            /* Mark entry as deleted */
+            htab->table[idx].used = -1;
+            --htab->filled;
+            return 1;
+        }

-    free(ep->key);
-    free(ep->data);
-    htab->table[idx].used = 0;
+        if (!htab->table[idx].used)  {
+            /* end of chain, return empty */
+            __set_errno(ESRCH);
+            return 0;
+        }

-    --htab->filled;
+        hval2 = hash_secondary(hval, htab);

-    return 1;
+        /*
+         * Because SIZE is prime this guarantees to
+         * step through all available indices.
+         */
+        if (idx <= hval2)
+            idx = htab->size + idx - hval2;
+        else
+            idx -= hval2;
+
+        /* If we visited all entries leave the loop */
+    } while (idx != hval);
+
+    __set_errno(ESRCH);
+    return 0;
  }

  /*
@@ -467,7 +547,7 @@ ssize_t hexport_r(struct hsearch_data *htab, const 
char sep,
       */
      for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) {

-        if (htab->table[i].used) {
+        if (htab->table[i].used > 0) {
              ENTRY *ep = &htab->table[i].entry;

              list[n++] = ep;
@@ -703,7 +783,7 @@ int himport_r(struct hsearch_data *htab,
          e.key = name;
          e.data = value;

-        hsearch_r(e, ENTER, &rv, htab);
+        henter_r(e, &rv, htab);
          if (rv == NULL) {
              printf("himport_r: can't insert \"%s=%s\" into hash table\n",
                  name, value);
Wolfgang Denk - Jan. 19, 2011, 8:47 p.m.
Dear Peter Barada,

In message <4D371208.3090801@logicpd.com> you wrote:
> 
> >> The hash delete code is in error; instead of just removing the deleted
> >> key, it should instead allocate a new hashtable, hash all the keys into
> >> the new table except for the deleted key and then reclaim the old table
> >> (and deleted key).
> > Can you please come up with a patch?
> 
> Hashtable delete is broken since freeing entry could break collision
> chain for other entries.  Instead free key/data pair and mark entry as 
> deleted (via
> negative used value) and then skip deleted entries while probing.
> 
> Break up hsearch_r into separate henter_r/hfind_r functions for 
> readability.
> Recycle first deleted entry in probe chain when adding entry to table.
> Factor primary/secondary hash calculations into functions and use
> in henter_r/hfind_r/hdelete_r.
> 
> ---
>   include/search.h |   16 ++-
>   lib/hashtable.c  |  382 
> ++++++++++++++++++++++++++++++++---------------------
>   2 files changed, 241 insertions(+), 157 deletions(-)
> 
> diff --git a/include/search.h b/include/search.h
> index a7c1293..4466731 100644
> --- a/include/search.h
> +++ b/include/search.h
> @@ -66,12 +66,16 @@ extern int hcreate_r(size_t __nel, struct 
> hsearch_data *__htab);
>   extern void hdestroy_r(struct hsearch_data *__htab);
> 
>   /*
> - * Search for entry matching ITEM.key in internal hash table.  If
> - * ACTION is `FIND' return found entry or signal error by returning
> - * NULL.  If ACTION is `ENTER' replace existing data (if any) with
> - * ITEM.data.
> - * */
> -extern int hsearch_r(ENTRY __item, ACTION __action, ENTRY ** __retval,
> + * Search for entry matching ITEM.key in internal hash table.  Return
> + * found entry or signal error by returning NULL.
> + */
> +extern int hfind_r(ENTRY __item, ENTRY ** __retval,
> +             struct hsearch_data *__htab);
> +/*
> + * Add entry matching ITEM.key in internal hash table.  Replace
> + * existing data (if any) with ITEM.data.
> + */
> +extern int henter_r(ENTRY __item, ENTRY ** __retval,
>                struct hsearch_data *__htab);
> 
>   /*
> diff --git a/lib/hashtable.c b/lib/hashtable.c
> index 9f069c0..7b5153a 100644
> --- a/lib/hashtable.c
> +++ b/lib/hashtable.c
> @@ -65,7 +65,7 @@
>    * which describes the current status.
>    */
>   typedef struct _ENTRY {
> -    unsigned int used;
> +    int used;
>       ENTRY entry;
>   } _ENTRY;
> 
> @@ -152,7 +152,7 @@ void hdestroy_r(struct hsearch_data *htab)
> 
>       /* free used memory */
>       for (i = 1; i <= htab->size; ++i) {
> -        if (htab->table[i].used) {
> +        if (htab->table[i].used > 0) {
>               ENTRY *ep = &htab->table[i].entry;
> 
>               free(ep->key);
> @@ -165,77 +165,20 @@ void hdestroy_r(struct hsearch_data *htab)
>       htab->table = NULL;
>   }
> 
> -/*
> - * hsearch()
> - */
> -
> -/*
> - * This is the search function. It uses double hashing with open 
> addressing.
> - * The argument item.key has to be a pointer to an zero terminated, most
> - * probably strings of chars. The function for generating a number of the
> - * strings is simple but fast. It can be replaced by a more complex 
> function



Your patch is line-wrapped and does not apply.

Also, insteadof imlementing just the discussed fix, it appears to me a
major rewrite - you thow away lots of (useful, in my opinion)
comments, debug code, etc., and even replace major parts of the code.

In the result, the code looks pretty much different from the standard
C library function it was derived from.

I don't think all this was needed only to fix the observed problem?

What is your rationale for all these changes?  

Best regards,

Wolfgang Denk

Patch

diff --git a/include/search.h b/include/search.h
index a7c1293..4466731 100644
--- a/include/search.h
+++ b/include/search.h
@@ -66,12 +66,16 @@  extern int hcreate_r(size_t __nel, struct 
hsearch_data *__htab);
  extern void hdestroy_r(struct hsearch_data *__htab);

  /*
- * Search for entry matching ITEM.key in internal hash table.  If
- * ACTION is `FIND' return found entry or signal error by returning
- * NULL.  If ACTION is `ENTER' replace existing data (if any) with
- * ITEM.data.
- * */
-extern int hsearch_r(ENTRY __item, ACTION __action, ENTRY ** __retval,
+ * Search for entry matching ITEM.key in internal hash table.  Return
+ * found entry or signal error by returning NULL.
+ */
+extern int hfind_r(ENTRY __item, ENTRY ** __retval,
+             struct hsearch_data *__htab);
+/*
+ * Add entry matching ITEM.key in internal hash table.  Replace
+ * existing data (if any) with ITEM.data.
+ */
+extern int henter_r(ENTRY __item, ENTRY ** __retval,
               struct hsearch_data *__htab);

  /*
diff --git a/lib/hashtable.c b/lib/hashtable.c
index 9f069c0..7b5153a 100644
--- a/lib/hashtable.c
+++ b/lib/hashtable.c
@@ -65,7 +65,7 @@ 
   * which describes the current status.
   */
  typedef struct _ENTRY {
-    unsigned int used;
+    int used;
      ENTRY entry;
  } _ENTRY;

@@ -152,7 +152,7 @@  void hdestroy_r(struct hsearch_data *htab)

      /* free used memory */
      for (i = 1; i <= htab->size; ++i) {
-        if (htab->table[i].used) {
+        if (htab->table[i].used > 0) {
              ENTRY *ep = &htab->table[i].entry;

              free(ep->key);
@@ -165,77 +165,20 @@  void hdestroy_r(struct hsearch_data *htab)
      htab->table = NULL;
  }

-/*
- * hsearch()
- */
-
-/*
- * This is the search function. It uses double hashing with open 
addressing.
- * The argument item.key has to be a pointer to an zero terminated, most
- * probably strings of chars. The function for generating a number of the
- * strings is simple but fast. It can be replaced by a more complex 
function
- * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
- *
- * We use an trick to speed up the lookup. The table is created by hcreate
- * with one more element available. This enables us to use the index zero
- * special. This index will never be used because we store the first hash
- * index in the field used where zero means not used. Every other value
- * means used. The used field can be used as a first fast comparison for
- * equality of the stored and the parameter value. This helps to prevent
- * unnecessary expensive calls of strcmp.
- *
- * This implementation differs from the standard library version of
- * this function in a number of ways:
- *
- * - While the standard version does not make any assumptions about
- *   the type of the stored data objects at all, this implementation
- *   works with NUL terminated strings only.
- * - Instead of storing just pointers to the original objects, we
- *   create local copies so the caller does not need to care about the
- *   data any more.
- * - The standard implementation does not provide a way to update an
- *   existing entry.  This version will create a new entry or update an
- *   existing one when both "action == ENTER" and "item.data != NULL".
- * - Instead of returning 1 on success, we return the index into the
- *   internal hash table, which is also guaranteed to be positive.
- *   This allows us direct access to the found hash table slot for
- *   example for functions like hdelete().
- */
-
-int hmatch_r(const char *match, int last_idx, ENTRY ** retval,
-         struct hsearch_data *htab)
-{
-    unsigned int idx;
-    size_t key_len = strlen(match);
-
-    for (idx = last_idx + 1; idx < htab->size; ++idx) {
-        if (!htab->table[idx].used)
-            continue;
-        if (!strncmp(match, htab->table[idx].entry.key, key_len)) {
-            *retval = &htab->table[idx].entry;
-            return idx;
-        }
-    }
-
-    __set_errno(ESRCH);
-    *retval = NULL;
-    return 0;
-}

-int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
-          struct hsearch_data *htab)
+/* Primary hash function */
+int hash_primary(ENTRY *item, struct hsearch_data *htab)
  {
+    int len = strlen(item->key);
      unsigned int hval;
      unsigned int count;
-    unsigned int len = strlen(item.key);
-    unsigned int idx;

-    /* Compute an value for the given string. Perhaps use a better 
method. */
+    /* Compute a value for the given string. Perhaps use a better 
method. */
      hval = len;
      count = len;
      while (count-- > 0) {
          hval <<= 4;