SetMatchMode ( SPH_MATCH_EXTENDED2 ); $cl->SetRankingMode ( SPH_RANK_WORDCOUNT ); $cl->SetFilterRange ( "len", $len-$delta, $len+$delta ); $cl->SetSelect ( "*, @weight+$delta-abs(len-$len) AS myrank" ); $cl->SetSortMode ( SPH_SORT_EXTENDED, "myrank DESC, freq DESC" ); $cl->SetArrayResult ( true ); // pull top-N best trigram matches and run them through Levenshtein $res = $cl->Query ( $query, "suggest", 0, TOP_COUNT ); if ( !$res || !$res["matches"] ) return false; if ( SUGGEST_DEBUG ) { print "--- DEBUG START ---\n"; foreach ( $res["matches"] as $match ) { $w = $match["keyword"]; $myrank = @$match["attrs"]["myrank"]; if ( $myrank ) $myrank = ", myrank=$myrank"; $levdist = levenshtein ( $keyword, $w ); print "id=$match[id], weight=$match[weight], freq={$match[attrs][freq]}{$myrank}, word=$w, levdist=$levdist\n"; } print "--- DEBUG END ---\n"; } // further restrict trigram matches with a sane Levenshtein distance limit foreach ( $res["matches"] as $match ) { $suggested = $match["attrs"]["keyword"]; if ( levenshtein ( $keyword, $suggested )<=LEVENSHTEIN_THRESHOLD ) return $suggested; } return $keyword; } /// main if ( $_SERVER["argc"]<2 ) { die ( "usage:\n" . "php suggest.php --builddict\treads stopwords from stdin, prints SQL dump of the dictionary to stdout\n" . "php suggest.php --query WORD\tqueries Sphinx, prints suggestion\n" ); } if ( $_SERVER["argv"][1]=="--builddict" ) { $in = fopen ( "php://stdin", "r" ); $out = fopen ( "php://stdout", "w+" ); BuildDictionarySQL ( $out, $in ); } if ( $_SERVER["argv"][1]=="--query" ) { mysql_connect ( "localhost", "root", "" ) or die ( "mysql_connect() failed: ".mysql_error() ); mysql_select_db ( "test" ) or die ( "mysql_select_db() failed: ".mysql_error() ); $keyword = $_SERVER["argv"][2]; printf ( "keyword: %s\nsuggestion: %s\n", $keyword, MakeSuggestion($keyword) ); }