// // $Id$ // // // Copyright (c) 2001-2011, Andrew Aksyonoff // Copyright (c) 2008-2011, Sphinx Technologies Inc // All rights reserved // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License. You should have // received a copy of the GPL license along with this program; if you // did not, you can find it at http://www.gnu.org/ // #include "sphinx.h" #include "sphinxutils.h" #include "sphinxint.h" #include #include "py_layer.h" #define CONF_CHECK(_hash,_key,_msg,_add) \ if (!( _hash.Exists ( _key ) )) \ { \ fprintf ( stdout, "ERROR: key '%s' not found " _msg, _key, _add ); \ continue; \ } const char * myctime ( DWORD uStamp ) { static char sBuf[256]; time_t tStamp = uStamp; // for 64-bit strncpy ( sBuf, ctime ( &tStamp ), sizeof(sBuf) ); char * p = sBuf; while ( (*p) && (*p)!='\n' && (*p)!='\r' ) p++; *p = '\0'; return sBuf; } int main ( int argc, char ** argv ) { fprintf ( stdout, SPHINX_BANNER ); if ( argc<=1 ) { fprintf ( stdout, "Usage: search [OPTIONS] \n" "\n" "Options are:\n" "-c, --config \tuse given config file instead of defaults\n" "-i, --index \tsearch given index only (default: all indexes)\n" "-a, --any\t\tmatch any query word (default: match all words)\n" "-b, --boolean\t\tmatch in boolean mode\n" "-p, --phrase\t\tmatch exact phrase\n" "-e, --extended\t\tmatch in extended mode\n" "-f, --filter \tonly match if attribute attr value is v\n" "-s, --sortby \tsort matches by 'CLAUSE' in sort_extended mode\n" "-S, --sortexpr \tsort matches by 'EXPR' DESC in sort_expr mode\n" "-o, --offset \tprint matches starting from this offset (default: 0)\n" "-l, --limit \tprint this many matches (default: 20)\n" "-q, --noinfo\t\tdon't print document info from SQL database\n" "-g, --group \tgroup by attribute named attr\n" "-gs,--groupsort \tsort groups by \n" "--sort=date\t\tsort by date, descending\n" "--rsort=date\t\tsort by date, ascending\n" "--sort=ts\t\tsort by time segments\n" "--stdin\t\t\tread query from stdin\n" "\n" "This program (CLI search) is for testing and debugging purposes only;\n" "it is NOT intended for production use.\n" ); exit ( 0 ); } /////////////////////////////////////////// // get query and other commandline options /////////////////////////////////////////// CSphQuery tQuery; char sQuery [ 1024 ]; sQuery[0] = '\0'; const char * sOptConfig = NULL; const char * sIndex = NULL; bool bNoInfo = false; bool bStdin = false; int iStart = 0; int iLimit = 20; #define OPT(_a1,_a2) else if ( !strcmp(argv[i],_a1) || !strcmp(argv[i],_a2) ) #define OPT1(_a1) else if ( !strcmp(argv[i],_a1) ) int i; for ( i=1; i=argc ) break; OPT ( "-o", "--offset" ) iStart = atoi ( argv[++i] ); OPT ( "-l", "--limit" ) iLimit = atoi ( argv[++i] ); OPT ( "-c", "--config" ) sOptConfig = argv[++i]; OPT ( "-i", "--index" ) sIndex = argv[++i]; OPT ( "-g", "--group" ) { tQuery.m_eGroupFunc = SPH_GROUPBY_ATTR; tQuery.m_sGroupBy = argv[++i]; } OPT ( "-gs","--groupsort" ) { tQuery.m_sGroupSortBy = argv[++i]; } // NOLINT OPT ( "-s", "--sortby" ) { tQuery.m_eSort = SPH_SORT_EXTENDED; tQuery.m_sSortBy = argv[++i]; } OPT ( "-S", "--sortexpr" ) { tQuery.m_eSort = SPH_SORT_EXPR; tQuery.m_sSortBy = argv[++i]; } else if ( (i+2)>=argc ) break; OPT ( "-f", "--filter" ) { DWORD uVal = strtoul ( argv[i+2], NULL, 10 ); tQuery.m_dFilters.Reset (); tQuery.m_dFilters.Resize ( 1 ); tQuery.m_dFilters[0].m_eType = SPH_FILTER_VALUES; tQuery.m_dFilters[0].m_dValues.Reset (); tQuery.m_dFilters[0].m_dValues.Add ( uVal ); tQuery.m_dFilters[0].m_sAttrName = argv[i+1]; i += 2; } else break; // unknown option } else if ( strlen(sQuery) + strlen(argv[i]) + 1 < sizeof(sQuery) ) { // this is a search term strcat ( sQuery, argv[i] ); // NOLINT strcat ( sQuery, " " ); // NOLINT } } iStart = Max ( iStart, 0 ); iLimit = Max ( iLimit, 0 ); if ( i!=argc ) { fprintf ( stdout, "ERROR: malformed or unknown option near '%s'.\n", argv[i] ); return 1; } #undef OPT if ( bStdin ) { int iPos = 0, iLeft = sizeof(sQuery)-1; char sThrowaway [ 256 ]; while ( !feof(stdin) ) { if ( iLeft>0 ) { int iLen = fread ( sQuery, 1, iLeft, stdin ); iPos += iLen; iLeft -= iLen; } else { int iDummy; // to avoid gcc unused result warning iDummy = fread ( sThrowaway, 1, sizeof(sThrowaway), stdin ); } } assert ( iPos<(int)sizeof(sQuery) ); sQuery[iPos] = '\0'; } ///////////// // configure ///////////// tQuery.m_iMaxMatches = Max ( 1000, iStart + iLimit ); CSphConfigParser cp; CSphConfig & hConf = cp.m_tConf; sphLoadConfig ( sOptConfig, false, cp ); ///////////////////// // init python layer //////////////////// if ( hConf("python") && hConf["python"]("python") ) { CSphConfigSection & hPython = hConf["python"]["python"]; #if USE_PYTHON if(!cftInitialize(hPython)) sphDie ( "Python layer's initiation failed."); #else sphDie ( "Python layer defined, but search does Not supports python. used --with-python to recompile."); #endif } ///////////////////// // search each index ///////////////////// hConf["index"].IterateStart (); while ( hConf["index"].IterateNext () ) { const CSphConfigSection & hIndex = hConf["index"].IterateGet (); const char * sIndexName = hConf["index"].IterateGetKey().cstr(); if ( sIndex && strcmp ( sIndex, sIndexName ) ) continue; if ( hIndex("type") && hIndex["type"]=="distributed" ) continue; if ( !hIndex.Exists ( "path" ) ) sphDie ( "key 'path' not found in index '%s'", sIndexName ); CSphString sError; // do we want to show document info from database? #if USE_MYSQL MYSQL tSqlDriver; const char * sQueryInfo = NULL; const char * sQueryInfoPre = NULL; //for set names while ( !bNoInfo ) { if ( !hIndex("source") || !hConf("source") || !hConf["source"]( hIndex["source"] ) ) break; const CSphConfigSection & hSource = hConf["source"][ hIndex["source"] ]; if ( !hSource("type") || hSource["type"]!="mysql" || !hSource("sql_host") || !hSource("sql_user") || !hSource("sql_db") || !hSource("sql_pass") || !hSource("sql_query_info") ) { break; } if (hSource("sql_query_info_pre")) { sQueryInfoPre = hSource["sql_query_info_pre"].cstr(); } sQueryInfo = hSource["sql_query_info"].cstr(); if ( !strstr ( sQueryInfo, "$id" ) ) sphDie ( "'sql_query_info' value must contain '$id'" ); int iPort = 3306; if ( hSource.Exists ( "sql_port" ) && hSource["sql_port"].intval() ) iPort = hSource["sql_port"].intval(); mysql_init ( &tSqlDriver ); if ( !mysql_real_connect ( &tSqlDriver, hSource["sql_host"].cstr(), hSource["sql_user"].cstr(), hSource["sql_pass"].cstr(), hSource["sql_db"].cstr(), iPort, hSource.Exists ( "sql_sock" ) ? hSource["sql_sock"].cstr() : NULL, 0 ) ) { sphDie ( "failed to connect to MySQL (error=%s)", mysql_error ( &tSqlDriver ) ); } // all good break; } #endif ////////// // search ////////// tQuery.m_sQuery = sQuery; CSphQueryResult * pResult = NULL; CSphIndex * pIndex = sphCreateIndexPhrase ( NULL, hIndex["path"].cstr() ); pIndex->m_bEnableStar = ( hIndex.GetInt("enable_star")!=0 ); pIndex->SetWordlistPreload ( hIndex.GetInt("ondisk_dict")==0 ); CSphString sWarning; sError = "could not create index (check that files exist)"; for ( ; pIndex; ) { if ( !pIndex->Prealloc ( false, false, sWarning ) || !pIndex->Preread() ) { sError = pIndex->GetLastError (); break; } const CSphSchema * pSchema = &pIndex->GetMatchSchema(); if ( !sWarning.IsEmpty () ) fprintf ( stdout, "WARNING: index '%s': %s\n", sIndexName, sWarning.cstr () ); // handle older index versions (<9) if ( !sphFixupIndexSettings ( pIndex, hIndex, sError ) ) sphDie ( "index '%s': %s", sIndexName, sError.cstr() ); // lookup first timestamp if needed // FIXME! remove this? if ( tQuery.m_eSort!=SPH_SORT_RELEVANCE && tQuery.m_eSort!=SPH_SORT_EXTENDED && tQuery.m_eSort!=SPH_SORT_EXPR ) { int iTS = -1; for ( int i=0; iGetAttrsCount(); i++ ) if ( pSchema->GetAttr(i).m_eAttrType==SPH_ATTR_TIMESTAMP ) { tQuery.m_sSortBy = pSchema->GetAttr(i).m_sName; iTS = i; break; } if ( iTS<0 ) { fprintf ( stdout, "index '%s': no timestamp attributes found, sorting by relevance.\n", sIndexName ); tQuery.m_eSort = SPH_SORT_RELEVANCE; } } // do querying ISphMatchSorter * pTop = sphCreateQueue ( &tQuery, pIndex->GetMatchSchema(), sError ); if ( !pTop ) { sError.SetSprintf ( "failed to create sorting queue: %s", sError.cstr() ); break; } pResult = new CSphQueryResult(); if ( !pIndex->MultiQuery ( &tQuery, pResult, 1, &pTop, NULL ) ) { // failure; pull that error message sError = pIndex->GetLastError(); SafeDelete ( pResult ); } else { // success; fold them matches pResult->m_dMatches.Reset (); pResult->m_iTotalMatches += pTop->GetTotalCount(); pResult->m_tSchema = pTop->GetSchema(); sphFlattenQueue ( pTop, pResult, 0 ); } SafeDelete ( pTop ); break; } ///////// // print ///////// if ( !pResult ) { fprintf ( stdout, "index '%s': search error: %s.\n", sIndexName, sError.cstr() ); return 1; } fprintf ( stdout, "index '%s': query '%s': returned %d matches of %d total in %d.%03d sec\n", sIndexName, sQuery, pResult->m_dMatches.GetLength(), pResult->m_iTotalMatches, pResult->m_iQueryTime/1000, pResult->m_iQueryTime%1000 ); if ( !pResult->m_sWarning.IsEmpty() ) fprintf ( stdout, "WARNING: %s\n", pResult->m_sWarning.cstr() ); if ( pResult->m_dMatches.GetLength() ) { fprintf ( stdout, "\ndisplaying matches:\n" ); int iMaxIndex = Min ( iStart+iLimit, pResult->m_dMatches.GetLength() ); for ( int i=iStart; im_dMatches[i]; fprintf ( stdout, "%d. document=" DOCID_FMT ", weight=%d", 1+i, tMatch.m_iDocID, tMatch.m_iWeight ); for ( int j=0; jm_tSchema.GetAttrsCount(); j++ ) { const CSphColumnInfo & tAttr = pResult->m_tSchema.GetAttr(j); fprintf ( stdout, ", %s=", tAttr.m_sName.cstr() ); if ( tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_UINT64SET ) { fprintf ( stdout, "(" ); SphAttr_t iIndex = tMatch.GetAttr ( tAttr.m_tLocator ); if ( iIndex ) { const DWORD * pValues = pResult->m_pMva + iIndex; int iValues = *pValues++; if ( tAttr.m_eAttrType==SPH_ATTR_UINT64SET ) { assert ( ( iValues%2 )==0 ); for ( int k=0; km_pStrings + tMatch.GetAttr ( tAttr.m_tLocator ), &pStr ); fwrite ( pStr, 1, iLen, stdout ); break; } default: fprintf ( stdout, "(unknown-type-%d)", tAttr.m_eAttrType ); } } fprintf ( stdout, "\n" ); #if USE_MYSQL if ( sQueryInfo ) { char * sQuery = sphStrMacro ( sQueryInfo, "$id", tMatch.m_iDocID ); const char * sError = NULL; #define LOC_MYSQL_ERROR(_arg) { sError = _arg; break; } if (sQueryInfoPre) { if(mysql_query ( &tSqlDriver, sQueryInfoPre )) LOC_MYSQL_ERROR ( "mysql_query" ); } for ( ;; ) { if ( mysql_query ( &tSqlDriver, sQuery ) ) LOC_MYSQL_ERROR ( "mysql_query" ); MYSQL_RES * pSqlResult = mysql_use_result ( &tSqlDriver ); if ( !pSqlResult ) LOC_MYSQL_ERROR ( "mysql_use_result" ); MYSQL_ROW tRow = mysql_fetch_row ( pSqlResult ); if ( !tRow ) { fprintf ( stdout, "\t(document not found in db)\n" ); break; } for ( int iField=0; iField<(int)pSqlResult->field_count; iField++ ) fprintf ( stdout, "\t%s=%s\n", ( pSqlResult->fields && pSqlResult->fields[iField].name ) ? pSqlResult->fields[iField].name : "(NULL)", tRow[iField] ? tRow[iField] : "(NULL)" ); mysql_free_result ( pSqlResult ); break; } if ( sError ) sphDie ( "sql_query_info: %s: %s", sError, mysql_error ( &tSqlDriver ) ); delete [] sQuery; } #endif } } fprintf ( stdout, "\nwords:\n" ); pResult->m_hWordStats.IterateStart(); int iWord = 1; while ( pResult->m_hWordStats.IterateNext() ) { const CSphQueryResultMeta::WordStat_t & tStat = pResult->m_hWordStats.IterateGet(); fprintf ( stdout, "%d. '%s': %d documents, %d hits\n", iWord, pResult->m_hWordStats.IterateGetKey().cstr(), tStat.m_iDocs, tStat.m_iHits ); iWord++; } fprintf ( stdout, "\n" ); /////////// // cleanup /////////// SafeDelete ( pIndex ); } sphShutdownWordforms (); #if USE_PYTHON cftShutdown(); //clean up #endif } // // $Id$ //