From patchwork Tue Oct 26 12:53:33 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Arnaud Charlet X-Patchwork-Id: 69243 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 6BC6CB70A9 for ; Tue, 26 Oct 2010 23:53:50 +1100 (EST) Received: (qmail 12324 invoked by alias); 26 Oct 2010 12:53:47 -0000 Received: (qmail 12287 invoked by uid 22791); 26 Oct 2010 12:53:44 -0000 X-SWARE-Spam-Status: No, hits=-1.8 required=5.0 tests=AWL, BAYES_00, T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from mel.act-europe.fr (HELO mel.act-europe.fr) (194.98.77.210) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Tue, 26 Oct 2010 12:53:36 +0000 Received: from localhost (localhost [127.0.0.1]) by filtered-smtp.eu.adacore.com (Postfix) with ESMTP id E84BBCB02ED; Tue, 26 Oct 2010 14:53:33 +0200 (CEST) Received: from mel.act-europe.fr ([127.0.0.1]) by localhost (smtp.eu.adacore.com [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id Bxe7UW2Zu6aC; Tue, 26 Oct 2010 14:53:33 +0200 (CEST) Received: from saumur.act-europe.fr (saumur.act-europe.fr [10.10.0.183]) by mel.act-europe.fr (Postfix) with ESMTP id D382ACB0240; Tue, 26 Oct 2010 14:53:33 +0200 (CEST) Received: by saumur.act-europe.fr (Postfix, from userid 525) id B68DBD9BB4; Tue, 26 Oct 2010 14:53:33 +0200 (CEST) Date: Tue, 26 Oct 2010 14:53:33 +0200 From: Arnaud Charlet To: gcc-patches@gcc.gnu.org Cc: Robert A Duff Subject: [Ada] Improve Namet hash function Message-ID: <20101026125333.GA2299@adacore.com> Mime-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.9i X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org This patch includes improved hashing in the Namet package and increasing the size from 2**12 to 2**16 buckets. The previous hash function was well-tuned to the 2**12 size, so we have to get rid of that. The hash function now looks at every character, so it should be better when there are lots of long and similar strings. The hash function is slower in that case, but there will be fewer collisions, so speed of lookups is faster overall. It's a lot less code, too. Tested on x86_64-pc-linux-gnu, committed on trunk 2010-10-26 Bob Duff * namet.adb: Improve hash function. Index: namet.adb =================================================================== --- namet.adb (revision 165949) +++ namet.adb (working copy) @@ -39,6 +39,8 @@ with Output; use Output; with Tree_IO; use Tree_IO; with Widechar; use Widechar; +with Interfaces; use Interfaces; + package body Namet is Name_Chars_Reserve : constant := 5000; @@ -50,7 +52,7 @@ package body Namet is -- reallocating during this second unlocked phase, we reserve a bit of -- extra space before doing the release call. - Hash_Num : constant Int := 2**12; + Hash_Num : constant Int := 2**16; -- Number of headers in the hash table. Current hash algorithm is closely -- tailored to this choice, so it can only be changed if a corresponding -- change is made to the hash algorithm. @@ -743,151 +745,27 @@ package body Namet is ---------- function Hash return Hash_Index_Type is - begin - -- For the cases of 1-12 characters, all characters participate in the - -- hash. The positioning is randomized, with the bias that characters - -- later on participate fully (i.e. are added towards the right side). - - case Name_Len is - - when 0 => - return 0; - - when 1 => - return - Character'Pos (Name_Buffer (1)); - - when 2 => - return (( - Character'Pos (Name_Buffer (1))) * 64 + - Character'Pos (Name_Buffer (2))) mod Hash_Num; - - when 3 => - return ((( - Character'Pos (Name_Buffer (1))) * 16 + - Character'Pos (Name_Buffer (3))) * 16 + - Character'Pos (Name_Buffer (2))) mod Hash_Num; - - when 4 => - return (((( - Character'Pos (Name_Buffer (1))) * 8 + - Character'Pos (Name_Buffer (2))) * 8 + - Character'Pos (Name_Buffer (3))) * 8 + - Character'Pos (Name_Buffer (4))) mod Hash_Num; - - when 5 => - return ((((( - Character'Pos (Name_Buffer (4))) * 8 + - Character'Pos (Name_Buffer (1))) * 4 + - Character'Pos (Name_Buffer (3))) * 4 + - Character'Pos (Name_Buffer (5))) * 8 + - Character'Pos (Name_Buffer (2))) mod Hash_Num; - - when 6 => - return (((((( - Character'Pos (Name_Buffer (5))) * 4 + - Character'Pos (Name_Buffer (1))) * 4 + - Character'Pos (Name_Buffer (4))) * 4 + - Character'Pos (Name_Buffer (2))) * 4 + - Character'Pos (Name_Buffer (6))) * 4 + - Character'Pos (Name_Buffer (3))) mod Hash_Num; - - when 7 => - return ((((((( - Character'Pos (Name_Buffer (4))) * 4 + - Character'Pos (Name_Buffer (3))) * 4 + - Character'Pos (Name_Buffer (1))) * 4 + - Character'Pos (Name_Buffer (2))) * 2 + - Character'Pos (Name_Buffer (5))) * 2 + - Character'Pos (Name_Buffer (7))) * 2 + - Character'Pos (Name_Buffer (6))) mod Hash_Num; - - when 8 => - return (((((((( - Character'Pos (Name_Buffer (2))) * 4 + - Character'Pos (Name_Buffer (1))) * 4 + - Character'Pos (Name_Buffer (3))) * 2 + - Character'Pos (Name_Buffer (5))) * 2 + - Character'Pos (Name_Buffer (7))) * 2 + - Character'Pos (Name_Buffer (6))) * 2 + - Character'Pos (Name_Buffer (4))) * 2 + - Character'Pos (Name_Buffer (8))) mod Hash_Num; - - when 9 => - return ((((((((( - Character'Pos (Name_Buffer (2))) * 4 + - Character'Pos (Name_Buffer (1))) * 4 + - Character'Pos (Name_Buffer (3))) * 4 + - Character'Pos (Name_Buffer (4))) * 2 + - Character'Pos (Name_Buffer (8))) * 2 + - Character'Pos (Name_Buffer (7))) * 2 + - Character'Pos (Name_Buffer (5))) * 2 + - Character'Pos (Name_Buffer (6))) * 2 + - Character'Pos (Name_Buffer (9))) mod Hash_Num; - - when 10 => - return (((((((((( - Character'Pos (Name_Buffer (01))) * 2 + - Character'Pos (Name_Buffer (02))) * 2 + - Character'Pos (Name_Buffer (08))) * 2 + - Character'Pos (Name_Buffer (03))) * 2 + - Character'Pos (Name_Buffer (04))) * 2 + - Character'Pos (Name_Buffer (09))) * 2 + - Character'Pos (Name_Buffer (06))) * 2 + - Character'Pos (Name_Buffer (05))) * 2 + - Character'Pos (Name_Buffer (07))) * 2 + - Character'Pos (Name_Buffer (10))) mod Hash_Num; - - when 11 => - return ((((((((((( - Character'Pos (Name_Buffer (05))) * 2 + - Character'Pos (Name_Buffer (01))) * 2 + - Character'Pos (Name_Buffer (06))) * 2 + - Character'Pos (Name_Buffer (09))) * 2 + - Character'Pos (Name_Buffer (07))) * 2 + - Character'Pos (Name_Buffer (03))) * 2 + - Character'Pos (Name_Buffer (08))) * 2 + - Character'Pos (Name_Buffer (02))) * 2 + - Character'Pos (Name_Buffer (10))) * 2 + - Character'Pos (Name_Buffer (04))) * 2 + - Character'Pos (Name_Buffer (11))) mod Hash_Num; - - when 12 => - return (((((((((((( - Character'Pos (Name_Buffer (03))) * 2 + - Character'Pos (Name_Buffer (02))) * 2 + - Character'Pos (Name_Buffer (05))) * 2 + - Character'Pos (Name_Buffer (01))) * 2 + - Character'Pos (Name_Buffer (06))) * 2 + - Character'Pos (Name_Buffer (04))) * 2 + - Character'Pos (Name_Buffer (08))) * 2 + - Character'Pos (Name_Buffer (11))) * 2 + - Character'Pos (Name_Buffer (07))) * 2 + - Character'Pos (Name_Buffer (09))) * 2 + - Character'Pos (Name_Buffer (10))) * 2 + - Character'Pos (Name_Buffer (12))) mod Hash_Num; - -- Names longer than 12 characters are handled by taking the first - -- 6 odd numbered characters and the last 6 even numbered characters. + -- This hash function looks at every character, in order to make it + -- likely that similar strings get different hash values. The rotate by + -- 7 bits has been determined empirically to be good, and it doesn't + -- lose bits like a shift would. The final conversion can't overflow, + -- because the table is 2**16 in size. This function probably needs to + -- be changed if the hash table size is changed. + + -- Note that we could get some speed improvement by aligning the string + -- to 32 or 64 bits, and doing word-wise xor's. We could also implement + -- a growable table. It doesn't seem worth the trouble to do those + -- things, for now. - when others => declare - Even_Name_Len : constant Integer := (Name_Len) / 2 * 2; - begin - return (((((((((((( - Character'Pos (Name_Buffer (01))) * 2 + - Character'Pos (Name_Buffer (Even_Name_Len - 10))) * 2 + - Character'Pos (Name_Buffer (03))) * 2 + - Character'Pos (Name_Buffer (Even_Name_Len - 08))) * 2 + - Character'Pos (Name_Buffer (05))) * 2 + - Character'Pos (Name_Buffer (Even_Name_Len - 06))) * 2 + - Character'Pos (Name_Buffer (07))) * 2 + - Character'Pos (Name_Buffer (Even_Name_Len - 04))) * 2 + - Character'Pos (Name_Buffer (09))) * 2 + - Character'Pos (Name_Buffer (Even_Name_Len - 02))) * 2 + - Character'Pos (Name_Buffer (11))) * 2 + - Character'Pos (Name_Buffer (Even_Name_Len))) mod Hash_Num; - end; - end case; + Result : Unsigned_16 := 0; + + begin + for J in 1 .. Name_Len loop + Result := Rotate_Left (Result, 7) xor Character'Pos (Name_Buffer (J)); + end loop; + + return Hash_Index_Type (Result); end Hash; ----------------