OSDN Git Service

merged 3.2 code
[nucleus-jp/nucleus-jp-ancient.git] / euc / nucleus / libs / SEARCH.php
1 <?php\r
2 \r
3 /**\r
4   * Nucleus: PHP/MySQL Weblog CMS (http://nucleuscms.org/) \r
5   * Copyright (C) 2003-2005 The Nucleus Group\r
6   *\r
7   * This program is free software; you can redistribute it and/or\r
8   * modify it under the terms of the GNU General Public License\r
9   * as published by the Free Software Foundation; either version 2\r
10   * of the License, or (at your option) any later version.\r
11   * (see nucleus/documentation/index.html#license for more info)\r
12   *\r
13   * SEARCH(querystring) offers different functionality to create an\r
14   * SQL query to find certain items. (and comments)\r
15   *\r
16   * based on code by David Altherr:\r
17   * http://www.evolt.org/article/Boolean_Fulltext_Searching_with_PHP_and_MySQL/18/15665/\r
18   * http://davidaltherr.net/web/php_functions/boolean/funcs.mysql.boolean.txt\r
19   * \r
20   *\r
21   * $Id: SEARCH.php,v 1.5 2005-03-16 08:10:35 kimitake Exp $\r
22   $ $NucleusJP: SEARCH.php,v 1.4 2005/03/12 06:19:05 kimitake Exp $\r
23   */\r
24 \r
25 \r
26 class SEARCH {\r
27         \r
28         var $querystring;\r
29         var $marked;\r
30         var $inclusive;\r
31         var $blogs;\r
32 \r
33 \r
34     function SEARCH($text) {\r
35         global $blogid;\r
36 \r
37 //       $text = preg_replace ("/[<,>,=,?,!,#,^,(,),[,\],:,;,\\\,%]/","",$text);\r
38 \r
39      /* * * for jp * * * * * * * * * * */\r
40         $text = $this->zenspace_replace($text);\r
41         $text = preg_replace ("/[<>=?!#^()[\]:;\\%]/","",$text);\r
42 \r
43         $this->ascii = '[\x00-\x7F]';\r
44         $this->two = '[\x8E\xA1-\xFE][\xA1-\xFE]';\r
45         $this->three = '\x8F[\xA1-\xFE][\xA1-\xFE]';\r
46 \r
47         $this->jpmarked      = $this->boolean_mark_atoms_jp($text);\r
48      /* * * * * * * * * * * * * * * * */\r
49 \r
50         $this->querystring      = $text;\r
51 //        $this->marked         = $this->boolean_mark_atoms($text);\r
52         $this->inclusive        = $this->boolean_inclusive_atoms($text);\r
53         $this->blogs            = array();\r
54 \r
55         // get all public searchable blogs, no matter what, include the current blog allways.\r
56                 $res = sql_query('SELECT bnumber FROM '.sql_table('blog').' WHERE bincludesearch=1 ');\r
57                 while ($obj = mysql_fetch_object($res)) \r
58                     $this->blogs[] = intval($obj->bnumber);\r
59         }\r
60 /***********************************************\r
61   zenkaku space to space\r
62 ***********************************************/\r
63 \r
64     function zenspace_replace($text){\r
65         $ta = unpack("C*",$text);\r
66         $len = count($ta);\r
67         $temp = '';\r
68         for($i=1; $i<=$len; $i++){\r
69                 if($ta[$i]>= 0x8e){ // ja\r
70                         if($ta[$i]>0xa0 || $ta[$i]<0x8f){ // 2byte\r
71                                 $char = chr($ta[$i]).chr($ta[$i+1]);\r
72                                 if($char == "\xA1\xA1") $char = "\x20";\r
73                                 $temp .= $char;\r
74                                 $i ++;\r
75                         }else{ // 3byte\r
76                                 $temp .= chr($ta[$i]).chr($ta[$i+1]).chr($ta[$i+2]);\r
77                                 $i += 2;\r
78                         }\r
79                 }else{ //ascii\r
80                         $temp .= chr($ta[$i]);\r
81                 }\r
82         }\r
83       return $temp;\r
84    }\r
85 /***********************************************/\r
86 \r
87     function  boolean_sql_select($match){\r
88 //        $string = $this->inclusive;\r
89 //        if (strlen($string) > 0) {\r
90            /* build sql for determining score for each record */\r
91 /*\r
92                preg_match_all(\r
93                                    "([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_]{0,})",\r
94                                $string,\r
95                                $result);\r
96            $result = $result[0];\r
97                  for($cth=0;$cth<count($result);$cth++){\r
98              if(strlen($result[$cth])>=4){\r
99                    $stringsum_long .=  " $result[$cth] ";\r
100                      }else{\r
101                        $stringsum_a[] = ' '.$this->boolean_sql_select_short($result[$cth],$match).' ';\r
102                      }\r
103                  }\r
104              if(strlen($stringsum_long)>0){\r
105                                 $stringsum_long = addslashes($stringsum_long);\r
106                         $stringsum_a[] = " match ($match) against ('$stringsum_long') ";\r
107              }\r
108                  $stringsum .= implode("+",$stringsum_a);\r
109              return $stringsum;\r
110             }\r
111 */\r
112     }\r
113 \r
114     \r
115 \r
116     function boolean_inclusive_atoms($string){\r
117         $result=trim($string);\r
118         $result=preg_replace("/([[:space:]]{2,})/",' ',$result);\r
119 \r
120         /* convert normal boolean operators to shortened syntax */\r
121         $result=eregi_replace(' not ',' -',$result);\r
122         $result=eregi_replace(' and ',' ',$result);\r
123         $result=eregi_replace(' or ',',',$result);\r
124 \r
125         /* drop unnecessary spaces */\r
126         $result=str_replace(' ,',',',$result);\r
127         $result=str_replace(', ',',',$result);\r
128         $result=str_replace('- ','-',$result);\r
129         $result=str_replace('+','',$result);\r
130 \r
131         /* strip exlusive atoms */\r
132         $result=preg_replace(\r
133 //              "(\-\([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_\,]{0,}\))",\r
134                 "(\-\(([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,}\))",\r
135                 '',\r
136                 $result);\r
137 \r
138         $result=preg_replace(\r
139 //              "(\-[A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_]{0,})",\r
140                 "(\-([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,})",\r
141                 '',\r
142                 $result);\r
143 \r
144         $result=str_replace('(',' ',$result);\r
145         $result=str_replace(')',' ',$result);\r
146         $result=str_replace(',',' ',$result);\r
147 \r
148         return $result;\r
149     }\r
150 \r
151     function boolean_sql_where($match){\r
152 /*\r
153         $result = $this->marked;\r
154         $result = preg_replace(\r
155                 "/foo\[\(\'([^\)]{4,})\'\)\]bar/e",\r
156                 " 'match ('.\$match.') against (\''.\$this->copyvalue(\"$1\").'\') > 0 ' ",\r
157                 $result);\r
158 \r
159         $result = preg_replace(                 \r
160             "/foo\[\(\'([^\)]{1,3})\'\)\]bar/e",\r
161             " '('.\$this->boolean_sql_where_short(\"$1\",\"$match\").')' ",             \r
162             $result);\r
163 */\r
164       $result = $this->jpmarked; /* for jp */\r
165         $result = $this->boolean_sql_where_jp_short($result,$match);/* for jp */\r
166         return $result;\r
167     }\r
168 \r
169     // there must be a simple way to simply copy a value with backslashes in it through\r
170     // the preg_replace, but I cannot currently find it (karma 2003-12-30)\r
171     function copyvalue($foo) {\r
172         return $foo;\r
173     }\r
174 \r
175 /***********************************************\r
176   Make "WHERE" (jp)\r
177 ***********************************************/\r
178 \r
179     function boolean_mark_atoms_jp($string){\r
180 \r
181         $result=trim($string);\r
182         $result=preg_replace("/([[:space:]]{2,})/",' ',$result);\r
183         \r
184         /* convert normal boolean operators to shortened syntax */\r
185         $result=eregi_replace(' not ',' -',$result);\r
186         $result=eregi_replace(' and ',' ',$result);\r
187         $result=eregi_replace(' or ',',',$result);\r
188 \r
189         /* strip excessive whitespace */\r
190         $result=str_replace(', ',',',$result);\r
191         $result=str_replace(' ,',',',$result);\r
192         $result=str_replace('- ','-',$result);\r
193         $result=str_replace('+','',$result);\r
194         \r
195         $result=str_replace(',',' ,',$result);\r
196 \r
197         return $result;\r
198     }\r
199     \r
200 \r
201     function boolean_sql_where_jp_short($string,$match){\r
202         $match_a = explode(',',$match);\r
203                         $key_a = explode(' ',$string);\r
204                         \r
205         for($ith=0;$ith<count($match_a);$ith++){\r
206                 $temp_a[$ith] = "(i.$match_a[$ith] LIKE '%" . addslashes($key_a[0]) . "%') ";\r
207         }\r
208         $like = '('.implode(' or ',$temp_a).')';\r
209 \r
210                         for($kn=1; $kn<count($key_a); $kn++){\r
211                 if(substr($key_a[$kn],0,1) == ","){\r
212                         for($ith=0;$ith<count($match_a);$ith++){\r
213                                 $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";\r
214                         }\r
215                         $like .=' OR ('. implode(' or ',$temp_a).')';\r
216                 }elseif(substr($key_a[$kn],0,1) != '-'){\r
217                         for($ith=0;$ith<count($match_a);$ith++){\r
218                                 $temp_a[$ith] = " (i.$match_a[$ith] LIKE '%" . addslashes($key_a[$kn]) . "%') ";\r
219                         }\r
220                         $like .=' AND ('. implode(' or ',$temp_a).')';\r
221                 }else{\r
222                         for($ith=0;$ith<count($match_a);$ith++){\r
223                                 $temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE '%" . addslashes(substr($key_a[$kn],1)) . "%') ";\r
224                         }\r
225                         $like .=' AND ('. implode(' and ',$temp_a).')';\r
226                 }\r
227         }\r
228         \r
229         $like = '('.$like.')';\r
230         return $like;\r
231     }\r
232 \r
233 /***********************************************/\r
234 \r
235 \r
236 /*\r
237     function boolean_mark_atoms($string){\r
238         $result=trim($string);\r
239         $result=preg_replace("/([[:space:]]{2,})/",' ',$result);\r
240 \r
241         //convert normal boolean operators to shortened syntax\r
242         $result=eregi_replace(' not ',' -',$result);\r
243         $result=eregi_replace(' and ',' ',$result);\r
244         $result=eregi_replace(' or ',',',$result);\r
245 \r
246 \r
247         //strip excessive whitespace\r
248         $result=str_replace('( ','(',$result);\r
249         $result=str_replace(' )',')',$result);\r
250         $result=str_replace(', ',',',$result);\r
251         $result=str_replace(' ,',',',$result);\r
252         $result=str_replace('- ','-',$result);\r
253         $result=str_replace('+','',$result);\r
254 \r
255         // remove double spaces (we might have introduced some new ones above)\r
256         $result=trim($result);\r
257         $result=preg_replace("/([[:space:]]{2,})/",' ',$result);\r
258 \r
259         // apply arbitrary function to all 'word' atoms\r
260 \r
261         $result_a = explode(" ",$result);\r
262         for($word=0;$word<count($result_a);$word++){\r
263             $result_a[$word] = "foo[('".$result_a[$word]."')]bar";\r
264         }\r
265         $result = implode(" ",$result_a);\r
266         \r
267         // dispatch ' ' to ' AND '\r
268         $result=str_replace(' ',' AND ',$result);\r
269 \r
270         // dispatch ',' to ' OR '\r
271         $result=str_replace(',',' OR ',$result);\r
272 \r
273         // dispatch '-' to ' NOT '\r
274         $result=str_replace(' -',' NOT ',$result);\r
275         return $result;\r
276     }\r
277     \r
278     function boolean_sql_where_short($string,$match){\r
279         $match_a = explode(',',$match);\r
280         for($ith=0;$ith<count($match_a);$ith++){\r
281                 $like_a[$ith] = " $match_a[$ith] LIKE '% $string %' ";\r
282         }\r
283         $like = implode(" OR ",$like_a);\r
284 \r
285         return $like;\r
286     }\r
287     function boolean_sql_select_short($string,$match){\r
288         $match_a = explode(',',$match);\r
289         $score_unit_weight = .2;\r
290         for($ith=0;$ith<count($match_a);$ith++){\r
291             $score_a[$ith] =\r
292                            " $score_unit_weight*(\r
293                            LENGTH(" . addslashes($match_a[$ith]) . ") -\r
294                            LENGTH(REPLACE(LOWER(" . addslashes($match_a[$ith]) . "),LOWER('" . addslashes($string) . "'),'')))\r
295                                        /LENGTH('" . addslashes($string) . "') ";\r
296         }\r
297             $score = implode(" + ",$score_a);\r
298 \r
299         return $score;\r
300     }\r
301 */\r
302 \r
303 }\r
304 ?>\r