1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
5 -- G N A T . P E R F E C T _ H A S H _ G E N E R A T O R S --
9 -- Copyright (C) 2002-2004 Ada Core Technologies, Inc. --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
32 ------------------------------------------------------------------------------
34 with Ada.Exceptions; use Ada.Exceptions;
35 with Ada.IO_Exceptions; use Ada.IO_Exceptions;
37 with GNAT.Heap_Sort_A; use GNAT.Heap_Sort_A;
38 with GNAT.OS_Lib; use GNAT.OS_Lib;
41 package body GNAT.Perfect_Hash_Generators is
43 -- We are using the algorithm of J. Czech as described in Zbigniew
44 -- J. Czech, George Havas, and Bohdan S. Majewski ``An Optimal
45 -- Algorithm for Generating Minimal Perfect Hash Functions'',
46 -- Information Processing Letters, 43(1992) pp.257-264, Oct.1992
48 -- This minimal perfect hash function generator is based on random
49 -- graphs and produces a hash function of the form:
51 -- h (w) = (g (f1 (w)) + g (f2 (w))) mod m
53 -- where f1 and f2 are functions that map strings into integers,
54 -- and g is a function that maps integers into [0, m-1]. h can be
55 -- order preserving. For instance, let W = {w_0, ..., w_i, ...,
56 -- w_m-1}, h can be defined such that h (w_i) = i.
58 -- This algorithm defines two possible constructions of f1 and
59 -- f2. Method b) stores the hash function in less memory space at
60 -- the expense of greater CPU time.
62 -- a) fk (w) = sum (for i in 1 .. length (w)) (Tk (i, w (i))) mod n
64 -- size (Tk) = max (for w in W) (length (w)) * size (used char set)
66 -- b) fk (w) = sum (for i in 1 .. length (w)) (Tk (i) * w (i)) mod n
68 -- size (Tk) = max (for w in W) (length (w)) but the table
69 -- lookups are replaced by multiplications.
71 -- where Tk values are randomly generated. n is defined later on
72 -- but the algorithm recommends to use a value a little bit
73 -- greater than 2m. Note that for large values of m, the main
74 -- memory space requirements comes from the memory space for
75 -- storing function g (>= 2m entries).
77 -- Random graphs are frequently used to solve difficult problems
78 -- that do not have polynomial solutions. This algorithm is based
79 -- on a weighted undirected graph. It comprises two steps: mapping
82 -- In the mapping step, a graph G = (V, E) is constructed, where V
83 -- = {0, 1, ..., n-1} and E = {(for w in W) (f1 (w), f2 (w))}. In
84 -- order for the assignment step to be successful, G has to be
85 -- acyclic. To have a high probability of generating an acyclic
86 -- graph, n >= 2m. If it is not acyclic, Tk have to be regenerated.
88 -- In the assignment step, the algorithm builds function g. As G
89 -- is acyclic, there is a vertex v1 with only one neighbor v2. Let
90 -- w_i be the word such that v1 = f1 (w_i) and v2 = f2 (w_i). Let
91 -- g (v1) = 0 by construction and g (v2) = (i - g (v1)) mod n (or
92 -- to be general, (h (i) - g (v1) mod n). If word w_j is such that
93 -- v2 = f1 (w_j) and v3 = f2 (w_j), g (v3) = (j - g (v2)) mod n
94 -- (or to be general, (h (j) - g (v2)) mod n). If w_i has no
95 -- neighbor, then another vertex is selected. The algorithm
96 -- traverses G to assign values to all the vertices. It cannot
97 -- assign a value to an already assigned vertex as G is acyclic.
99 subtype Word_Id is Integer;
100 subtype Key_Id is Integer;
101 subtype Vertex_Id is Integer;
102 subtype Edge_Id is Integer;
103 subtype Table_Id is Integer;
105 No_Vertex : constant Vertex_Id := -1;
106 No_Edge : constant Edge_Id := -1;
107 No_Table : constant Table_Id := -1;
109 Max_Word_Length : constant := 32;
110 subtype Word_Type is String (1 .. Max_Word_Length);
111 Null_Word : constant Word_Type := (others => ASCII.NUL);
112 -- Store keyword in a word. Note that the length of word is
113 -- limited to 32 characters.
115 type Key_Type is record
118 -- A key corresponds to an edge in the algorithm graph.
120 type Vertex_Type is record
124 -- A vertex can be involved in several edges. First and Last are
125 -- the bounds of an array of edges stored in a global edge table.
127 type Edge_Type is record
132 -- An edge is a peer of vertices. In the algorithm, a key
133 -- is associated to an edge.
135 package WT is new GNAT.Table (Word_Type, Word_Id, 0, 32, 32);
136 package IT is new GNAT.Table (Integer, Integer, 0, 32, 32);
137 -- The two main tables. IT is used to store several tables of
138 -- components containing only integers.
140 function Image (Int : Integer; W : Natural := 0) return String;
141 function Image (Str : String; W : Natural := 0) return String;
142 -- Return a string which includes string Str or integer Int
143 -- preceded by leading spaces if required by width W.
145 Output : File_Descriptor renames GNAT.OS_Lib.Standout;
148 Max : constant := 78;
150 Line : String (1 .. Max);
151 -- Use this line to provide buffered IO
153 procedure Add (C : Character);
154 procedure Add (S : String);
155 -- Add a character or a string in Line and update Last
158 (F : File_Descriptor;
166 -- Write string S into file F as a element of an array of one or
167 -- two dimensions. Fk (resp. Lk and Ck) indicates the first (resp
168 -- last and current) index in the k-th dimension. If F1 = L1 the
169 -- array is considered as a one dimension array. This dimension is
170 -- described by F2 and L2. This routine takes care of all the
171 -- parenthesis, spaces and commas needed to format correctly the
172 -- array. Moreover, the array is well indented and is wrapped to
173 -- fit in a 80 col line. When the line is full, the routine writes
174 -- it into file F. When the array is completed, the routine adds a
175 -- semi-colon and writes the line into file F.
178 (F : File_Descriptor);
179 -- Simulate Ada.Text_IO.New_Line with GNAT.OS_Lib
182 (F : File_Descriptor;
184 -- Simulate Ada.Text_IO.Put with GNAT.OS_Lib
186 procedure Put_Used_Char_Set
187 (File : File_Descriptor;
189 -- Output a title and a used character set
191 procedure Put_Int_Vector
192 (File : File_Descriptor;
196 -- Output a title and a vector
198 procedure Put_Int_Matrix
199 (File : File_Descriptor;
202 -- Output a title and a matrix. When the matrix has only one
203 -- non-empty dimension, it is output as a vector.
206 (File : File_Descriptor;
208 -- Output a title and an edge table
210 procedure Put_Initial_Keys
211 (File : File_Descriptor;
213 -- Output a title and a key table
215 procedure Put_Reduced_Keys
216 (File : File_Descriptor;
218 -- Output a title and a key table
220 procedure Put_Vertex_Table
221 (File : File_Descriptor;
223 -- Output a title and a vertex table
225 ----------------------------------
226 -- Character Position Selection --
227 ----------------------------------
229 -- We reduce the maximum key size by selecting representative
230 -- positions in these keys. We build a matrix with one word per
231 -- line. We fill the remaining space of a line with ASCII.NUL. The
232 -- heuristic selects the position that induces the minimum number
233 -- of collisions. If there are collisions, select another position
234 -- on the reduced key set responsible of the collisions. Apply the
235 -- heuristic until there is no more collision.
237 procedure Apply_Position_Selection;
238 -- Apply Position selection and build the reduced key table
240 procedure Parse_Position_Selection (Argument : String);
241 -- Parse Argument and compute the position set. Argument is a
242 -- list of substrings separated by commas. Each substring
243 -- represents a position or a range of positions (like x-y).
245 procedure Select_Character_Set;
246 -- Define an optimized used character set like Character'Pos in
247 -- order not to allocate tables of 256 entries.
249 procedure Select_Char_Position;
250 -- Find a min char position set in order to reduce the max key
251 -- length. The heuristic selects the position that induces the
252 -- minimum number of collisions. If there are collisions, select
253 -- another position on the reduced key set responsible of the
254 -- collisions. Apply the heuristic until there is no collision.
256 -----------------------------
257 -- Random Graph Generation --
258 -----------------------------
260 procedure Random (Seed : in out Natural);
261 -- Simulate Ada.Discrete_Numerics.Random.
263 procedure Generate_Mapping_Table
268 -- Random generation of the tables below. T is already allocated.
270 procedure Generate_Mapping_Tables
273 -- Generate the mapping tables T1 and T2. They are used to define :
274 -- fk (w) = sum (for i in 1 .. length (w)) (Tk (i, w (i))) mod n.
275 -- Keys, NK and Chars are used to compute the matrix size.
277 ---------------------------
278 -- Algorithm Computation --
279 ---------------------------
281 procedure Compute_Edges_And_Vertices (Opt : Optimization);
282 -- Compute the edge and vertex tables. These are empty when a self
283 -- loop is detected (f1 (w) = f2 (w)). The edge table is sorted by
284 -- X value and then Y value. Keys is the key table and NK the
285 -- number of keys. Chars is the set of characters really used in
286 -- Keys. NV is the number of vertices recommended by the
287 -- algorithm. T1 and T2 are the mapping tables needed to compute
288 -- f1 (w) and f2 (w).
290 function Acyclic return Boolean;
291 -- Return True when the graph is acyclic. Vertices is the current
292 -- vertex table and Edges the current edge table.
294 procedure Assign_Values_To_Vertices;
295 -- Execute the assignment step of the algorithm. Keys is the
296 -- current key table. Vertices and Edges represent the random
297 -- graph. G is the result of the assignment step such that:
298 -- h (w) = (g (f1 (w)) + g (f2 (w))) mod m
305 -- For an optimization of CPU_Time return
306 -- fk (w) = sum (for i in 1 .. length (w)) (Tk (i, w (i))) mod n
307 -- For an optimization of Memory_Space return
308 -- fk (w) = sum (for i in 1 .. length (w)) (Tk (i) * w (i)) mod n
311 -------------------------------
312 -- Internal Table Management --
313 -------------------------------
315 function Allocate (N : Natural; S : Natural) return Table_Id;
316 -- procedure Deallocate (N : Natural; S : Natural);
322 Key_Size : constant := 1;
323 Keys : Table_Id := No_Table;
325 -- NK : Number of Keys
327 function Initial (K : Key_Id) return Word_Id;
328 pragma Inline (Initial);
330 function Reduced (K : Key_Id) return Word_Id;
331 pragma Inline (Reduced);
333 function Get_Key (F : Key_Id) return Key_Type;
334 procedure Set_Key (F : Key_Id; Item : Key_Type);
335 -- Comments needed here ???
341 Char_Pos_Size : constant := 1;
342 Char_Pos_Set : Table_Id := No_Table;
343 Char_Pos_Set_Len : Natural;
344 -- Character Selected Position Set
346 function Get_Char_Pos (P : Natural) return Natural;
347 procedure Set_Char_Pos (P : Natural; Item : Natural);
348 -- Comments needed here ???
354 Used_Char_Size : constant := 1;
355 Used_Char_Set : Table_Id := No_Table;
356 Used_Char_Set_Len : Natural;
357 -- Used Character Set : Define a new character mapping. When all
358 -- the characters are not present in the keys, in order to reduce
359 -- the size of some tables, we redefine the character mapping.
361 function Get_Used_Char (C : Character) return Natural;
362 procedure Set_Used_Char (C : Character; Item : Natural);
368 Rand_Tab_Item_Size : constant := 1;
369 T1 : Table_Id := No_Table;
370 T2 : Table_Id := No_Table;
371 Rand_Tab_Len_1 : Natural;
372 Rand_Tab_Len_2 : Natural;
373 -- T1 : Values table to compute F1
374 -- T2 : Values table to compute F2
376 function Get_Rand_Tab (T : Integer; X, Y : Natural) return Natural;
377 procedure Set_Rand_Tab (T : Integer; X, Y : Natural; Item : Natural);
383 Graph_Item_Size : constant := 1;
384 G : Table_Id := No_Table;
386 -- G : Values table to compute G
388 function Get_Graph (F : Natural) return Integer;
389 procedure Set_Graph (F : Natural; Item : Integer);
390 -- Comments needed ???
396 Edge_Size : constant := 3;
397 Edges : Table_Id := No_Table;
399 -- Edges : Edge table of the random graph G
401 function Get_Edges (F : Natural) return Edge_Type;
402 procedure Set_Edges (F : Natural; Item : Edge_Type);
408 Vertex_Size : constant := 2;
410 Vertices : Table_Id := No_Table;
411 -- Vertex table of the random graph G
414 -- Number of Vertices
416 function Get_Vertices (F : Natural) return Vertex_Type;
417 procedure Set_Vertices (F : Natural; Item : Vertex_Type);
418 -- Comments needed ???
421 -- Ratio between Keys and Vertices (parameter of Czech's algorithm)
424 -- Optimization mode (memory vs CPU)
427 -- Maximum of all the word length
432 function Type_Size (L : Natural) return Natural;
433 -- Given the last L of an unsigned integer type T, return its size
439 function Acyclic return Boolean
441 Marks : array (0 .. NV - 1) of Vertex_Id := (others => No_Vertex);
447 -- Propagate Mark from X to Y. X is already marked. Mark Y and
448 -- propagate it to the edges of Y except the one representing
449 -- the same key. Return False when Y is marked with Mark.
460 E : constant Edge_Type := Get_Edges (Edge);
461 K : constant Key_Id := E.Key;
462 Y : constant Vertex_Id := E.Y;
463 M : constant Vertex_Id := Marks (E.Y);
470 elsif M = No_Vertex then
472 V := Get_Vertices (Y);
474 for J in V.First .. V.Last loop
476 -- Do not propagate to the edge representing the same key.
478 if Get_Edges (J).Key /= K
479 and then not Traverse (J, Mark)
491 -- Start of processing for Acyclic
494 -- Edges valid range is
496 for J in 1 .. Edges_Len - 1 loop
498 Edge := Get_Edges (J);
500 -- Mark X of E when it has not been already done
502 if Marks (Edge.X) = No_Vertex then
503 Marks (Edge.X) := Edge.X;
506 -- Traverse E when this has not already been done
508 if Marks (Edge.Y) = No_Vertex
509 and then not Traverse (J, Edge.X)
522 procedure Add (C : Character) is
524 Line (Last + 1) := C;
532 procedure Add (S : String) is
533 Len : constant Natural := S'Length;
536 Line (Last + 1 .. Last + Len) := S;
544 function Allocate (N : Natural; S : Natural) return Table_Id is
545 L : constant Integer := IT.Last;
548 IT.Set_Last (L + N * S);
552 ------------------------------
553 -- Apply_Position_Selection --
554 ------------------------------
556 procedure Apply_Position_Selection is
558 WT.Set_Last (2 * NK - 1);
559 for J in 0 .. NK - 1 loop
561 I_Word : constant Word_Type := WT.Table (Initial (J));
562 R_Word : Word_Type := Null_Word;
563 Index : Natural := I_Word'First - 1;
566 -- Select the characters of Word included in the
567 -- position selection.
569 for C in 0 .. Char_Pos_Set_Len - 1 loop
570 exit when I_Word (Get_Char_Pos (C)) = ASCII.NUL;
572 R_Word (Index) := I_Word (Get_Char_Pos (C));
575 -- Build the new table with the reduced word
577 WT.Table (Reduced (J)) := R_Word;
578 Set_Key (J, (Edge => No_Edge));
581 end Apply_Position_Selection;
587 procedure Compute (Position : String := Default_Position) is
589 Keys := Allocate (NK, Key_Size);
592 Put_Initial_Keys (Output, "Initial Key Table");
595 if Position'Length /= 0 then
596 Parse_Position_Selection (Position);
598 Select_Char_Position;
603 (Output, "Char Position Set", Char_Pos_Set, Char_Pos_Set_Len);
606 Apply_Position_Selection;
609 Put_Reduced_Keys (Output, "Reduced Keys Table");
612 Select_Character_Set;
615 Put_Used_Char_Set (Output, "Character Position Table");
618 -- Perform Czech's algorithm
621 Generate_Mapping_Tables (Opt, S);
622 Compute_Edges_And_Vertices (Opt);
624 -- When graph is not empty (no self-loop from previous
625 -- operation) and not acyclic.
627 exit when 0 < Edges_Len and then Acyclic;
630 Assign_Values_To_Vertices;
633 -------------------------------
634 -- Assign_Values_To_Vertices --
635 -------------------------------
637 procedure Assign_Values_To_Vertices is
640 procedure Assign (X : Vertex_Id);
641 -- Execute assignment on X's neighbors except the vertex that
642 -- we are coming from which is already assigned.
648 procedure Assign (X : Vertex_Id)
651 V : constant Vertex_Type := Get_Vertices (X);
654 for J in V.First .. V.Last loop
656 if Get_Graph (E.Y) = -1 then
657 Set_Graph (E.Y, (E.Key - Get_Graph (X)) mod NK);
663 -- Start of processing for Assign_Values_To_Vertices
666 -- Value -1 denotes an unitialized value as it is supposed to
667 -- be in the range 0 .. NK.
671 G := Allocate (Graph_Len, Graph_Item_Size);
674 for J in 0 .. Graph_Len - 1 loop
678 for K in 0 .. NK - 1 loop
679 X := Get_Edges (Get_Key (K).Edge).X;
681 if Get_Graph (X) = -1 then
687 for J in 0 .. Graph_Len - 1 loop
688 if Get_Graph (J) = -1 then
694 Put_Int_Vector (Output, "Assign Values To Vertices", G, Graph_Len);
696 end Assign_Values_To_Vertices;
698 --------------------------------
699 -- Compute_Edges_And_Vertices --
700 --------------------------------
702 procedure Compute_Edges_And_Vertices (Opt : Optimization) is
707 Vertex : Vertex_Type;
708 Not_Acyclic : Boolean := False;
710 procedure Move (From : Natural; To : Natural);
711 function Lt (L, R : Natural) return Boolean;
712 -- Subprograms needed for GNAT.Heap_Sort_A
718 procedure Move (From : Natural; To : Natural) is
720 Set_Edges (To, Get_Edges (From));
727 function Lt (L, R : Natural) return Boolean is
728 EL : constant Edge_Type := Get_Edges (L);
729 ER : constant Edge_Type := Get_Edges (R);
732 return EL.X < ER.X or else (EL.X = ER.X and then EL.Y < ER.Y);
735 -- Start of processing for Compute_Edges_And_Vertices
738 -- We store edges from 1 to 2 * NK and leave
739 -- zero alone in order to use GNAT.Heap_Sort_A.
741 Edges_Len := 2 * NK + 1;
743 if Edges = No_Table then
744 Edges := Allocate (Edges_Len, Edge_Size);
747 if Vertices = No_Table then
748 Vertices := Allocate (NV, Vertex_Size);
751 for J in 0 .. NV - 1 loop
752 Set_Vertices (J, (No_Vertex, No_Vertex - 1));
755 -- For each w, X = f1 (w) and Y = f2 (w)
757 for J in 0 .. NK - 1 loop
762 X := Sum (WT.Table (Reduced (J)), T1, Opt);
763 Y := Sum (WT.Table (Reduced (J)), T2, Opt);
765 -- Discard T1 and T2 as soon as we discover a self loop
772 -- We store (X, Y) and (Y, X) to ease assignment step
774 Set_Edges (2 * J + 1, (X, Y, J));
775 Set_Edges (2 * J + 2, (Y, X, J));
778 -- Return an empty graph when self loop detected
785 Put_Edges (Output, "Unsorted Edge Table");
786 Put_Int_Matrix (Output, "Function Table 1", T1);
787 Put_Int_Matrix (Output, "Function Table 2", T2);
790 -- Enforce consistency between edges and keys. Construct
791 -- Vertices and compute the list of neighbors of a vertex
792 -- First .. Last as Edges is sorted by X and then Y. To
793 -- compute the neighbor list, sort the edges.
797 Move'Unrestricted_Access,
798 Lt'Unrestricted_Access);
801 Put_Edges (Output, "Sorted Edge Table");
802 Put_Int_Matrix (Output, "Function Table 1", T1);
803 Put_Int_Matrix (Output, "Function Table 2", T2);
806 -- Edges valid range is 1 .. 2 * NK
808 for E in 1 .. Edges_Len - 1 loop
809 Edge := Get_Edges (E);
810 Key := Get_Key (Edge.Key);
812 if Key.Edge = No_Edge then
814 Set_Key (Edge.Key, Key);
817 Vertex := Get_Vertices (Edge.X);
819 if Vertex.First = No_Edge then
824 Set_Vertices (Edge.X, Vertex);
828 Put_Reduced_Keys (Output, "Key Table");
829 Put_Edges (Output, "Edge Table");
830 Put_Vertex_Table (Output, "Vertex Table");
833 end Compute_Edges_And_Vertices;
841 Item_Size : out Natural;
842 Length_1 : out Natural;
843 Length_2 : out Natural)
847 when Character_Position =>
849 Length_1 := Char_Pos_Set_Len;
852 when Used_Character_Set =>
857 when Function_Table_1
858 | Function_Table_2 =>
859 Item_Size := Type_Size (NV);
860 Length_1 := Rand_Tab_Len_1;
861 Length_2 := Rand_Tab_Len_2;
864 Item_Size := Type_Size (NK);
874 procedure Finalize is
882 Char_Pos_Set := No_Table;
883 Char_Pos_Set_Len := 0;
885 Used_Char_Set := No_Table;
886 Used_Char_Set_Len := 0;
900 Vertices := No_Table;
904 ----------------------------
905 -- Generate_Mapping_Table --
906 ----------------------------
908 procedure Generate_Mapping_Table
915 for J in 0 .. L1 - 1 loop
916 for K in 0 .. L2 - 1 loop
918 Set_Rand_Tab (T, J, K, S mod NV);
921 end Generate_Mapping_Table;
923 -----------------------------
924 -- Generate_Mapping_Tables --
925 -----------------------------
927 procedure Generate_Mapping_Tables
932 -- If T1 and T2 are already allocated no need to do it
933 -- twice. Reuse them as their size has not changes.
935 if T1 = No_Table and then T2 = No_Table then
937 Used_Char_Last : Natural := 0;
941 if Opt = CPU_Time then
942 for P in reverse Character'Range loop
943 Used_Char := Get_Used_Char (P);
944 if Used_Char /= 0 then
945 Used_Char_Last := Used_Char;
951 Rand_Tab_Len_1 := Char_Pos_Set_Len;
952 Rand_Tab_Len_2 := Used_Char_Last + 1;
953 T1 := Allocate (Rand_Tab_Len_1 * Rand_Tab_Len_2,
955 T2 := Allocate (Rand_Tab_Len_1 * Rand_Tab_Len_2,
960 Generate_Mapping_Table (T1, Rand_Tab_Len_1, Rand_Tab_Len_2, S);
961 Generate_Mapping_Table (T2, Rand_Tab_Len_1, Rand_Tab_Len_2, S);
964 Put_Used_Char_Set (Output, "Used Character Set");
965 Put_Int_Matrix (Output, "Function Table 1", T1);
966 Put_Int_Matrix (Output, "Function Table 2", T2);
968 end Generate_Mapping_Tables;
974 function Get_Char_Pos (P : Natural) return Natural is
975 N : constant Natural := Char_Pos_Set + P;
985 function Get_Edges (F : Natural) return Edge_Type is
986 N : constant Natural := Edges + (F * Edge_Size);
991 E.Y := IT.Table (N + 1);
992 E.Key := IT.Table (N + 2);
1000 function Get_Graph (F : Natural) return Integer is
1001 N : constant Natural := G + F * Graph_Item_Size;
1004 return IT.Table (N);
1011 function Get_Key (F : Key_Id) return Key_Type is
1012 N : constant Natural := Keys + F * Key_Size;
1016 K.Edge := IT.Table (N);
1024 function Get_Rand_Tab (T : Integer; X, Y : Natural) return Natural is
1025 N : constant Natural :=
1026 T + ((Y * Rand_Tab_Len_1) + X) * Rand_Tab_Item_Size;
1029 return IT.Table (N);
1036 function Get_Used_Char (C : Character) return Natural is
1037 N : constant Natural :=
1038 Used_Char_Set + Character'Pos (C) * Used_Char_Size;
1041 return IT.Table (N);
1048 function Get_Vertices (F : Natural) return Vertex_Type is
1049 N : constant Natural := Vertices + (F * Vertex_Size);
1053 V.First := IT.Table (N);
1054 V.Last := IT.Table (N + 1);
1062 function Image (Int : Integer; W : Natural := 0) return String is
1063 B : String (1 .. 32);
1066 procedure Img (V : Natural);
1067 -- Compute image of V into B, starting at B (L), incrementing L
1073 procedure Img (V : Natural) is
1080 B (L) := Character'Val ((V mod 10) + Character'Pos ('0'));
1083 -- Start of processing for Image
1094 return Image (B (1 .. L), W);
1101 function Image (Str : String; W : Natural := 0) return String is
1102 Len : constant Natural := Str'Length;
1103 Max : Natural := Len;
1111 Buf : String (1 .. Max) := (1 .. Max => ' ');
1114 for J in 0 .. Len - 1 loop
1115 Buf (Max - Len + 1 + J) := Str (Str'First + J);
1126 function Initial (K : Key_Id) return Word_Id is
1135 procedure Initialize
1137 K_To_V : Float := Default_K_To_V;
1138 Optim : Optimization := CPU_Time)
1148 Char_Pos_Set := No_Table;
1149 Char_Pos_Set_Len := 0;
1163 Word : Word_Type := Null_Word;
1164 Len : constant Natural := Value'Length;
1167 Word (1 .. Len) := Value (Value'First .. Value'First + Len - 1);
1169 WT.Table (NK) := Word;
1171 NV := Natural (Float (NK) * K2V);
1182 procedure New_Line (F : File_Descriptor) is
1183 EOL : constant Character := ASCII.LF;
1186 if Write (F, EOL'Address, 1) /= 1 then
1187 raise Program_Error;
1191 ------------------------------
1192 -- Parse_Position_Selection --
1193 ------------------------------
1195 procedure Parse_Position_Selection (Argument : String) is
1196 N : Natural := Argument'First;
1197 L : constant Natural := Argument'Last;
1198 M : constant Natural := MKL;
1200 T : array (1 .. M) of Boolean := (others => False);
1202 function Parse_Index return Natural;
1203 -- Parse argument starting at index N to find an index
1209 function Parse_Index return Natural
1211 C : Character := Argument (N);
1220 if C not in '0' .. '9' then
1222 (Program_Error'Identity, "cannot read position argument");
1225 while C in '0' .. '9' loop
1226 V := V * 10 + (Character'Pos (C) - Character'Pos ('0'));
1235 -- Start of processing for Parse_Position_Selection
1238 Char_Pos_Set_Len := 2 * NK;
1240 -- Empty specification means all the positions
1243 Char_Pos_Set_Len := M;
1244 Char_Pos_Set := Allocate (Char_Pos_Set_Len, Char_Pos_Size);
1246 for C in 0 .. Char_Pos_Set_Len - 1 loop
1247 Set_Char_Pos (C, C + 1);
1253 First, Last : Natural;
1256 First := Parse_Index;
1261 if N <= L and then Argument (N) = '-' then
1263 Last := Parse_Index;
1266 -- Include the positions in the selection
1268 for J in First .. Last loop
1275 if Argument (N) /= ',' then
1277 (Program_Error'Identity, "cannot read position argument");
1283 -- Compute position selection length
1286 for J in T'Range loop
1292 -- Fill position selection
1294 Char_Pos_Set_Len := N;
1295 Char_Pos_Set := Allocate (Char_Pos_Set_Len, Char_Pos_Size);
1298 for J in T'Range loop
1300 Set_Char_Pos (N, J);
1305 end Parse_Position_Selection;
1311 procedure Produce (Pkg_Name : String := Default_Pkg_Name) is
1312 File : File_Descriptor;
1315 -- For call to Close;
1317 function Type_Img (L : Natural) return String;
1318 -- Return the larger unsigned type T such that T'Last < L
1320 function Range_Img (F, L : Natural; T : String := "") return String;
1321 -- Return string "[T range ]F .. L"
1323 function Array_Img (N, T, R1 : String; R2 : String := "") return String;
1324 -- Return string "N : constant array (R1[, R2]) of T;"
1330 function Type_Img (L : Natural) return String is
1331 S : constant String := Image (Type_Size (L));
1332 U : String := "Unsigned_ ";
1336 for J in S'Range loop
1348 function Range_Img (F, L : Natural; T : String := "") return String is
1349 FI : constant String := Image (F);
1350 FL : constant Natural := FI'Length;
1351 LI : constant String := Image (L);
1352 LL : constant Natural := LI'Length;
1353 TL : constant Natural := T'Length;
1354 RI : String (1 .. TL + 7 + FL + 4 + LL);
1359 RI (Len + 1 .. Len + TL) := T;
1361 RI (Len + 1 .. Len + 7) := " range ";
1365 RI (Len + 1 .. Len + FL) := FI;
1367 RI (Len + 1 .. Len + 4) := " .. ";
1369 RI (Len + 1 .. Len + LL) := LI;
1371 return RI (1 .. Len);
1387 Add (" : constant array (");
1398 return Line (1 .. Last);
1405 PLen : constant Natural := Pkg_Name'Length;
1406 FName : String (1 .. PLen + 4);
1408 -- Start of processing for Produce
1411 FName (1 .. PLen) := Pkg_Name;
1412 for J in 1 .. PLen loop
1413 if FName (J) in 'A' .. 'Z' then
1414 FName (J) := Character'Val (Character'Pos (FName (J))
1415 - Character'Pos ('A')
1416 + Character'Pos ('a'));
1418 elsif FName (J) = '.' then
1423 FName (PLen + 1 .. PLen + 4) := ".ads";
1425 File := Create_File (FName, Text);
1426 Put (File, "package ");
1427 Put (File, Pkg_Name);
1430 Put (File, " function Hash (S : String) return Natural;");
1433 Put (File, Pkg_Name);
1436 Close (File, Status);
1442 FName (PLen + 4) := 'b';
1444 File := Create_File (FName, Text);
1445 Put (File, "with Interfaces; use Interfaces;");
1448 Put (File, "package body ");
1449 Put (File, Pkg_Name);
1454 if Opt = CPU_Time then
1455 Put (File, Array_Img ("C", Type_Img (256), "Character"));
1458 F := Character'Pos (Character'First);
1459 L := Character'Pos (Character'Last);
1461 for J in Character'Range loop
1462 P := Get_Used_Char (J);
1463 Put (File, Image (P), 0, 0, 0, F, L, Character'Pos (J));
1470 L := Char_Pos_Set_Len - 1;
1472 Put (File, Array_Img ("P", "Natural", Range_Img (F, L)));
1475 for J in F .. L loop
1476 Put (File, Image (Get_Char_Pos (J)), 0, 0, 0, F, L, J);
1481 if Opt = CPU_Time then
1484 Array_Img ("T1", Type_Img (NV),
1485 Range_Img (0, Rand_Tab_Len_1 - 1),
1486 Range_Img (0, Rand_Tab_Len_2 - 1,
1493 Array_Img ("T1", Type_Img (NV),
1494 Range_Img (0, Rand_Tab_Len_1 - 1)),
1500 if Opt = CPU_Time then
1503 Array_Img ("T2", Type_Img (NV),
1504 Range_Img (0, Rand_Tab_Len_1 - 1),
1505 Range_Img (0, Rand_Tab_Len_2 - 1,
1512 Array_Img ("T2", Type_Img (NV),
1513 Range_Img (0, Rand_Tab_Len_1 - 1)),
1521 Array_Img ("G", Type_Img (NK),
1522 Range_Img (0, Graph_Len - 1)),
1526 Put (File, " function Hash (S : String) return Natural is");
1528 Put (File, " F : constant Natural := S'First - 1;");
1530 Put (File, " L : constant Natural := S'Length;");
1532 Put (File, " F1, F2 : Natural := 0;");
1535 Put (File, " J : ");
1537 if Opt = CPU_Time then
1538 Put (File, Type_Img (256));
1540 Put (File, "Natural");
1546 Put (File, " begin");
1548 Put (File, " for K in P'Range loop");
1550 Put (File, " exit when L < P (K);");
1552 Put (File, " J := ");
1554 if Opt = CPU_Time then
1557 Put (File, "Character'Pos");
1560 Put (File, " (S (P (K) + F));");
1563 Put (File, " F1 := (F1 + Natural (T1 (K");
1565 if Opt = CPU_Time then
1571 if Opt = Memory_Space then
1575 Put (File, ") mod ");
1576 Put (File, Image (NV));
1580 Put (File, " F2 := (F2 + Natural (T2 (K");
1582 if Opt = CPU_Time then
1588 if Opt = Memory_Space then
1592 Put (File, ") mod ");
1593 Put (File, Image (NV));
1597 Put (File, " end loop;");
1601 " return (Natural (G (F1)) + Natural (G (F2))) mod ");
1603 Put (File, Image (NK));
1606 Put (File, " end Hash;");
1610 Put (File, Pkg_Name);
1613 Close (File, Status);
1624 procedure Put (F : File_Descriptor; S : String) is
1625 Len : constant Natural := S'Length;
1628 if Write (F, S'Address, Len) /= Len then
1629 raise Program_Error;
1638 (F : File_Descriptor;
1647 Len : constant Natural := S'Length;
1657 Put (F, Line (1 .. Last));
1662 -- Start of processing for Put
1665 if C1 = F1 and then C2 = F2 then
1669 if Last + Len + 3 > Max then
1674 Line (Last + 1 .. Last + 5) := " ";
1678 if C1 = F1 and then C2 = F2 then
1692 Line (Last + 1 .. Last + Len) := S;
1715 -----------------------
1716 -- Put_Used_Char_Set --
1717 -----------------------
1719 procedure Put_Used_Char_Set
1720 (File : File_Descriptor;
1723 F : constant Natural := Character'Pos (Character'First);
1724 L : constant Natural := Character'Pos (Character'Last);
1730 for J in Character'Range loop
1732 (File, Image (Get_Used_Char (J)), 0, 0, 0, F, L, Character'Pos (J));
1734 end Put_Used_Char_Set;
1740 procedure Put_Int_Matrix
1741 (File : File_Descriptor;
1745 F1 : constant Natural := 0;
1746 L1 : constant Natural := Rand_Tab_Len_1 - 1;
1747 F2 : constant Natural := 0;
1748 L2 : constant Natural := Rand_Tab_Len_2 - 1;
1755 for J in F1 .. L1 loop
1757 Image (Get_Rand_Tab (Table, J, F2)), 0, 0, 0, F1, L1, J);
1761 for J in F1 .. L1 loop
1762 for K in F2 .. L2 loop
1764 Image (Get_Rand_Tab (Table, J, K)), F1, L1, J, F2, L2, K);
1770 --------------------
1771 -- Put_Int_Vector --
1772 --------------------
1774 procedure Put_Int_Vector
1775 (File : File_Descriptor;
1780 F2 : constant Natural := 0;
1781 L2 : constant Natural := Length - 1;
1787 for J in F2 .. L2 loop
1788 Put (File, Image (IT.Table (Root + J)), 0, 0, 0, F2, L2, J);
1797 (File : File_Descriptor;
1801 F1 : constant Natural := 1;
1802 L1 : constant Natural := Edges_Len - 1;
1803 M : constant Natural := Max / 5;
1809 -- Edges valid range is 1 .. Edge_Len - 1
1811 for J in F1 .. L1 loop
1813 Put (File, Image (J, M), F1, L1, J, 1, 4, 1);
1814 Put (File, Image (E.X, M), F1, L1, J, 1, 4, 2);
1815 Put (File, Image (E.Y, M), F1, L1, J, 1, 4, 3);
1816 Put (File, Image (E.Key, M), F1, L1, J, 1, 4, 4);
1820 ---------------------------
1821 -- Put_Initial_Keys --
1822 ---------------------------
1824 procedure Put_Initial_Keys
1825 (File : File_Descriptor;
1828 F1 : constant Natural := 0;
1829 L1 : constant Natural := NK - 1;
1830 M : constant Natural := Max / 5;
1837 for J in F1 .. L1 loop
1839 Put (File, Image (J, M), F1, L1, J, 1, 3, 1);
1840 Put (File, Image (K.Edge, M), F1, L1, J, 1, 3, 2);
1841 Put (File, WT.Table (Initial (J)), F1, L1, J, 1, 3, 3);
1843 end Put_Initial_Keys;
1845 ---------------------------
1846 -- Put_Reduced_Keys --
1847 ---------------------------
1849 procedure Put_Reduced_Keys
1850 (File : File_Descriptor;
1853 F1 : constant Natural := 0;
1854 L1 : constant Natural := NK - 1;
1855 M : constant Natural := Max / 5;
1862 for J in F1 .. L1 loop
1864 Put (File, Image (J, M), F1, L1, J, 1, 3, 1);
1865 Put (File, Image (K.Edge, M), F1, L1, J, 1, 3, 2);
1866 Put (File, WT.Table (Reduced (J)), F1, L1, J, 1, 3, 3);
1868 end Put_Reduced_Keys;
1870 ----------------------
1871 -- Put_Vertex_Table --
1872 ----------------------
1874 procedure Put_Vertex_Table
1875 (File : File_Descriptor;
1878 F1 : constant Natural := 0;
1879 L1 : constant Natural := NV - 1;
1880 M : constant Natural := Max / 4;
1887 for J in F1 .. L1 loop
1888 V := Get_Vertices (J);
1889 Put (File, Image (J, M), F1, L1, J, 1, 3, 1);
1890 Put (File, Image (V.First, M), F1, L1, J, 1, 3, 2);
1891 Put (File, Image (V.Last, M), F1, L1, J, 1, 3, 3);
1893 end Put_Vertex_Table;
1899 procedure Random (Seed : in out Natural)
1901 -- Park & Miller Standard Minimal using Schrage's algorithm to
1902 -- avoid overflow: Xn+1 = 16807 * Xn mod (2 ** 31 - 1)
1909 R := Seed mod 127773;
1911 X := 16807 * R - 2836 * Q;
1914 Seed := X + 2147483647;
1924 function Reduced (K : Key_Id) return Word_Id is
1929 --------------------------
1930 -- Select_Character_Set --
1931 --------------------------
1933 procedure Select_Character_Set
1935 Last : Natural := 0;
1936 Used : array (Character) of Boolean := (others => False);
1939 for J in 0 .. NK - 1 loop
1940 for K in 1 .. Max_Word_Length loop
1941 exit when WT.Table (Initial (J))(K) = ASCII.NUL;
1942 Used (WT.Table (Initial (J))(K)) := True;
1946 Used_Char_Set_Len := 256;
1947 Used_Char_Set := Allocate (Used_Char_Set_Len, Used_Char_Size);
1949 for J in Used'Range loop
1951 Set_Used_Char (J, Last);
1954 Set_Used_Char (J, 0);
1957 end Select_Character_Set;
1959 --------------------------
1960 -- Select_Char_Position --
1961 --------------------------
1963 procedure Select_Char_Position is
1965 type Vertex_Table_Type is array (Natural range <>) of Vertex_Type;
1967 procedure Build_Identical_Keys_Sets
1968 (Table : in out Vertex_Table_Type;
1969 Last : in out Natural;
1971 -- Build a list of keys subsets that are identical with the
1972 -- current position selection plus Pos. Once this routine is
1973 -- called, reduced words are sorted by subsets and each item
1974 -- (First, Last) in Sets defines the range of identical keys.
1976 function Count_Identical_Keys
1977 (Table : Vertex_Table_Type;
1981 -- For each subset in Sets, count the number of identical keys
1982 -- if we add Pos to the current position selection.
1984 Sel_Position : IT.Table_Type (1 .. MKL);
1985 Last_Sel_Pos : Natural := 0;
1987 -------------------------------
1988 -- Build_Identical_Keys_Sets --
1989 -------------------------------
1991 procedure Build_Identical_Keys_Sets
1992 (Table : in out Vertex_Table_Type;
1993 Last : in out Natural;
1996 S : constant Vertex_Table_Type := Table (1 .. Last);
1997 C : constant Natural := Pos;
2002 -- First and last words of a subset
2007 -- For each subset in S, extract the new subsets we have by
2008 -- adding C in the position selection.
2010 for J in S'Range loop
2013 -- GNAT.Heap_Sort assumes that the first array index
2014 -- is 1. Offset defines the translation to operate.
2016 procedure Move (From : Natural; To : Natural);
2017 function Lt (L, R : Natural) return Boolean;
2018 -- Subprograms needed by GNAT.Heap_Sort_A
2024 procedure Move (From : Natural; To : Natural) is
2025 Target, Source : Natural;
2030 Target := Offset + To;
2032 Source := Offset + From;
2035 Source := Offset + From;
2036 Target := Offset + To;
2039 WT.Table (Reduced (Target)) := WT.Table (Reduced (Source));
2046 function Lt (L, R : Natural) return Boolean is
2047 C : constant Natural := Pos;
2054 Right := Offset + R;
2060 Right := Offset + R;
2063 return WT.Table (Reduced (Left))(C)
2064 < WT.Table (Reduced (Right))(C);
2067 -- Start of processing for Build_Identical_Key_Sets
2070 Offset := S (J).First - 1;
2072 (S (J).Last - S (J).First + 1,
2073 Move'Unrestricted_Access,
2074 Lt'Unrestricted_Access);
2078 for N in S (J).First .. S (J).Last - 1 loop
2080 -- Two contiguous words are identical
2082 if WT.Table (Reduced (N))(C) =
2083 WT.Table (Reduced (N + 1))(C)
2085 -- This is the first word of the subset
2093 -- This is the last word of the subset
2097 Table (Last) := (F, L);
2102 -- This is the last word of the subset and of the set
2106 Table (Last) := (F, L);
2110 end Build_Identical_Keys_Sets;
2112 --------------------------
2113 -- Count_Identical_Keys --
2114 --------------------------
2116 function Count_Identical_Keys
2117 (Table : Vertex_Table_Type;
2122 N : array (Character) of Natural;
2127 -- For each subset, count the number of words that are still
2128 -- identical when we include Sel_Position (Last_Sel_Pos) in
2129 -- the position selection. Only focus on this position as the
2130 -- other positions already produce identical keys.
2132 for S in 1 .. Last loop
2134 -- Count the occurrences of the different characters
2137 for K in Table (S).First .. Table (S).Last loop
2138 C := WT.Table (Reduced (K))(Pos);
2142 -- Add to the total when there are two identical keys
2144 for J in N'Range loop
2152 end Count_Identical_Keys;
2154 -- Start of processing for Select_Char_Position
2157 for C in Sel_Position'Range loop
2158 Sel_Position (C) := C;
2161 -- Initialization of Words
2163 WT.Set_Last (2 * NK - 1);
2165 for K in 0 .. NK - 1 loop
2166 WT.Table (Reduced (K) + 1) := WT.Table (Initial (K));
2170 Collisions : Natural;
2171 Min_Collisions : Natural := NK;
2172 Old_Collisions : Natural;
2173 Min_Coll_Sel_Pos : Natural := 0; -- init to kill warning
2174 Min_Coll_Sel_Pos_Idx : Natural := 0; -- init to kill warning
2175 Same_Keys_Sets_Table : Vertex_Table_Type (1 .. NK);
2176 Same_Keys_Sets_Last : Natural := 1;
2179 Same_Keys_Sets_Table (1) := (1, NK);
2182 -- Preserve minimum identical keys and check later on
2183 -- that this value is strictly decrementing. Otherwise,
2184 -- it means that two keys are stricly identical.
2186 Old_Collisions := Min_Collisions;
2188 -- Find which position reduces the most of collisions
2190 for J in Last_Sel_Pos + 1 .. Sel_Position'Last loop
2191 Collisions := Count_Identical_Keys
2192 (Same_Keys_Sets_Table,
2193 Same_Keys_Sets_Last,
2196 if Collisions < Min_Collisions then
2197 Min_Collisions := Collisions;
2198 Min_Coll_Sel_Pos := Sel_Position (J);
2199 Min_Coll_Sel_Pos_Idx := J;
2203 if Old_Collisions = Min_Collisions then
2205 (Program_Error'Identity, "some keys are identical");
2208 -- Insert selected position and sort Sel_Position table
2210 Last_Sel_Pos := Last_Sel_Pos + 1;
2211 Sel_Position (Last_Sel_Pos + 1 .. Min_Coll_Sel_Pos_Idx) :=
2212 Sel_Position (Last_Sel_Pos .. Min_Coll_Sel_Pos_Idx - 1);
2213 Sel_Position (Last_Sel_Pos) := Min_Coll_Sel_Pos;
2215 for P in 1 .. Last_Sel_Pos - 1 loop
2216 if Min_Coll_Sel_Pos < Sel_Position (P) then
2217 Sel_Position (P + 1 .. Last_Sel_Pos) :=
2218 Sel_Position (P .. Last_Sel_Pos - 1);
2219 Sel_Position (P) := Min_Coll_Sel_Pos;
2224 exit when Min_Collisions = 0;
2226 Build_Identical_Keys_Sets
2227 (Same_Keys_Sets_Table,
2228 Same_Keys_Sets_Last,
2233 Char_Pos_Set_Len := Last_Sel_Pos;
2234 Char_Pos_Set := Allocate (Char_Pos_Set_Len, Char_Pos_Size);
2236 for C in 1 .. Last_Sel_Pos loop
2237 Set_Char_Pos (C - 1, Sel_Position (C));
2239 end Select_Char_Position;
2245 procedure Set_Char_Pos (P : Natural; Item : Natural) is
2246 N : constant Natural := Char_Pos_Set + P;
2249 IT.Table (N) := Item;
2256 procedure Set_Edges (F : Natural; Item : Edge_Type) is
2257 N : constant Natural := Edges + (F * Edge_Size);
2260 IT.Table (N) := Item.X;
2261 IT.Table (N + 1) := Item.Y;
2262 IT.Table (N + 2) := Item.Key;
2269 procedure Set_Graph (F : Natural; Item : Integer) is
2270 N : constant Natural := G + (F * Graph_Item_Size);
2273 IT.Table (N) := Item;
2280 procedure Set_Key (F : Key_Id; Item : Key_Type) is
2281 N : constant Natural := Keys + F * Key_Size;
2284 IT.Table (N) := Item.Edge;
2291 procedure Set_Rand_Tab (T : Integer; X, Y : Natural; Item : Natural) is
2292 N : constant Natural :=
2293 T + ((Y * Rand_Tab_Len_1) + X) * Rand_Tab_Item_Size;
2296 IT.Table (N) := Item;
2303 procedure Set_Used_Char (C : Character; Item : Natural) is
2304 N : constant Natural :=
2305 Used_Char_Set + Character'Pos (C) * Used_Char_Size;
2308 IT.Table (N) := Item;
2315 procedure Set_Vertices (F : Natural; Item : Vertex_Type) is
2316 N : constant Natural := Vertices + (F * Vertex_Size);
2319 IT.Table (N) := Item.First;
2320 IT.Table (N + 1) := Item.Last;
2337 if Opt = CPU_Time then
2338 for J in 0 .. Rand_Tab_Len_1 - 1 loop
2339 exit when Word (J + 1) = ASCII.NUL;
2340 R := Get_Rand_Tab (Table, J, Get_Used_Char (Word (J + 1)));
2341 S := (S + R) mod NV;
2345 for J in 0 .. Rand_Tab_Len_1 - 1 loop
2346 exit when Word (J + 1) = ASCII.NUL;
2347 R := Get_Rand_Tab (Table, J, 0);
2348 S := (S + R * Character'Pos (Word (J + 1))) mod NV;
2359 function Type_Size (L : Natural) return Natural is
2363 elsif L <= 2 ** 16 then
2382 when Character_Position =>
2383 return Get_Char_Pos (J);
2385 when Used_Character_Set =>
2386 return Get_Used_Char (Character'Val (J));
2388 when Function_Table_1 =>
2389 return Get_Rand_Tab (T1, J, K);
2391 when Function_Table_2 =>
2392 return Get_Rand_Tab (T2, J, K);
2395 return Get_Graph (J);
2400 end GNAT.Perfect_Hash_Generators;