mirror of
https://github.com/moparisthebest/moparscape.org-smf
synced 2024-12-22 07:08:48 -05:00
Add Sphinx SearchAPI
This commit is contained in:
parent
3ae19ba230
commit
8f46702a56
229
Sources/SearchAPI-Sphinx.php
Normal file
229
Sources/SearchAPI-Sphinx.php
Normal file
@ -0,0 +1,229 @@
|
|||||||
|
<?php
|
||||||
|
/**********************************************************************************
|
||||||
|
* SearchAPI-Sphinx.php *
|
||||||
|
***********************************************************************************
|
||||||
|
* SMF: Simple Machines Forum *
|
||||||
|
* Open-Source Project Inspired by Zef Hemel (zef@zefhemel.com) *
|
||||||
|
* =============================================================================== *
|
||||||
|
* Software Version: SMF 2.0 RC1 *
|
||||||
|
* Software by: Simple Machines (http://www.simplemachines.org) *
|
||||||
|
* Copyright 2006-2009 by: Simple Machines LLC (http://www.simplemachines.org) *
|
||||||
|
* 2001-2006 by: Lewis Media (http://www.lewismedia.com) *
|
||||||
|
* Support, News, Updates at: http://www.simplemachines.org *
|
||||||
|
***********************************************************************************
|
||||||
|
* This program is free software; you may redistribute it and/or modify it under *
|
||||||
|
* the terms of the provided license as published by Simple Machines LLC. *
|
||||||
|
* *
|
||||||
|
* This program is distributed in the hope that it is and will be useful, but *
|
||||||
|
* WITHOUT ANY WARRANTIES; without even any implied warranty of MERCHANTABILITY *
|
||||||
|
* or FITNESS FOR A PARTICULAR PURPOSE. *
|
||||||
|
* *
|
||||||
|
* See the "license.txt" file for details of the Simple Machines license. *
|
||||||
|
* The latest version can always be found at http://www.simplemachines.org. *
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
if (!defined('SMF'))
|
||||||
|
die('Hacking attempt...');
|
||||||
|
|
||||||
|
/*
|
||||||
|
int searchSort(string $wordA, string $wordB)
|
||||||
|
- callback function for usort used to sort the fulltext results.
|
||||||
|
- the order of sorting is: large words, small words, large words that
|
||||||
|
are excluded from the search, small words that are excluded.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// latest config: http://www.simplemachines.org/community/index.php?topic=127672.msg1361578#msg1361578
|
||||||
|
// this file from: http://www.simplemachines.org/community/index.php?topic=127672.msg2230464#msg2230464
|
||||||
|
|
||||||
|
class sphinx_search
|
||||||
|
{
|
||||||
|
// This is the last version of SMF that this was tested on, to protect against API changes.
|
||||||
|
public $version_compatible = 'SMF 2.0';
|
||||||
|
// This won't work with versions of SMF less than this.
|
||||||
|
public $min_smf_version = 'SMF 2.0 Beta 2';
|
||||||
|
// Is it supported?
|
||||||
|
public $is_supported = true;
|
||||||
|
|
||||||
|
// What databases support the custom index?
|
||||||
|
protected $supported_databases = array('mysql');
|
||||||
|
|
||||||
|
// Nothing to do...
|
||||||
|
public function __construct()
|
||||||
|
{
|
||||||
|
global $db_type;
|
||||||
|
|
||||||
|
// Is this database supported?
|
||||||
|
if (!in_array($db_type, $this->supported_databases))
|
||||||
|
{
|
||||||
|
$this->is_supported = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check whether the search can be performed by this API.
|
||||||
|
public function supportsMethod($methodName, $query_params = null)
|
||||||
|
{
|
||||||
|
switch ($methodName)
|
||||||
|
{
|
||||||
|
case 'searchSort':
|
||||||
|
case 'prepareIndexes':
|
||||||
|
case 'indexedWordQuery':
|
||||||
|
return true;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'searchQuery':
|
||||||
|
|
||||||
|
// Search can be performed, but not for 'subject only' query.
|
||||||
|
return !$query_params['subject_only'];
|
||||||
|
|
||||||
|
default:
|
||||||
|
|
||||||
|
// All other methods, too bad dunno you.
|
||||||
|
return false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function compares the length of two strings plus a little.
|
||||||
|
public function searchSort($a, $b)
|
||||||
|
{
|
||||||
|
global $modSettings, $excludedWords;
|
||||||
|
|
||||||
|
$x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0);
|
||||||
|
$y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0);
|
||||||
|
|
||||||
|
return $x < $y ? 1 : ($x > $y ? -1 : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do we have to do some work with the words we are searching for to prepare them?
|
||||||
|
public function prepareIndexes($word, &$wordsSearch, &$wordsExclude, $isExcluded)
|
||||||
|
{
|
||||||
|
global $modSettings;
|
||||||
|
|
||||||
|
$subwords = text2words($word, null, false);
|
||||||
|
|
||||||
|
$fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"';
|
||||||
|
$wordsSearch['indexed_words'][] = $fulltextWord;
|
||||||
|
if ($isExcluded)
|
||||||
|
$wordsExclude[] = $fulltextWord;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This has it's own custom search.
|
||||||
|
public function searchQuery($search_params, $search_words, $excluded_words, &$participants, &$search_results)
|
||||||
|
{
|
||||||
|
global $user_info, $context, $sourcedir, $modSettings;
|
||||||
|
|
||||||
|
// Only request the results if they haven't been cached yet.
|
||||||
|
if (($cached_results = cache_get_data('search_results_' . md5($user_info['query_see_board'] . '_' . $context['params']))) === null)
|
||||||
|
{
|
||||||
|
//!!! Should this not be in here?
|
||||||
|
// The API communicating with the search daemon.
|
||||||
|
require_once($sourcedir . '/sphinxapi.php');
|
||||||
|
|
||||||
|
// Create an instance of the sphinx client and set a few options.
|
||||||
|
$mySphinx = new SphinxClient();
|
||||||
|
$mySphinx->SetServer($modSettings['sphinx_searchd_server'], (int) $modSettings['sphinx_searchd_port']);
|
||||||
|
$mySphinx->SetLimits(0, (int) $modSettings['sphinx_max_results']);
|
||||||
|
$mySphinx->SetMatchMode(SPH_MATCH_EXTENDED);
|
||||||
|
|
||||||
|
// Put together a sort string; besides the main column sort (relevance, id_topic, or num_replies), add secondary sorting based on relevance value (if not the main sort method) and age
|
||||||
|
$sphinx_sort = ($search_params['sort'] === 'id_msg' ? 'id_topic' : $search_params['sort']) . ' ' . $search_params['sort_dir'] . ($search_params['sort'] === 'relevance' ? '' : ', relevance desc') . ', poster_time desc';
|
||||||
|
// Grouping by topic id makes it return only one result per topic, so don't set that for in-topic searches
|
||||||
|
if (empty($search_params['topic']))
|
||||||
|
$mySphinx->SetGroupBy('id_topic', SPH_GROUPBY_ATTR, $sphinx_sort);
|
||||||
|
$mySphinx->SetSortMode(SPH_SORT_EXTENDED, $sphinx_sort);
|
||||||
|
|
||||||
|
// Set the limits based on the search parameters.
|
||||||
|
if (!empty($search_params['min_msg_id']) || !empty($search_params['max_msg_id']))
|
||||||
|
$mySphinx->SetIDRange($search_params['min_msg_id'], empty($search_params['max_msg_id']) ? (int) $modSettings['maxMsgID'] : $search_params['max_msg_id']);
|
||||||
|
if (!empty($search_params['topic']))
|
||||||
|
$mySphinx->SetFilter('id_topic', array((int) $search_params['topic']));
|
||||||
|
if (!empty($search_params['brd']))
|
||||||
|
$mySphinx->SetFilter('id_board', $search_params['brd']);
|
||||||
|
if (!empty($search_params['memberlist']))
|
||||||
|
$mySphinx->SetFilter('id_member', $search_params['memberlist']);
|
||||||
|
|
||||||
|
// Construct the (binary mode) query.
|
||||||
|
$orResults = array();
|
||||||
|
$inc_words = array();
|
||||||
|
foreach ($search_words as $orIndex => $words)
|
||||||
|
{
|
||||||
|
$inc_words = array_merge($inc_words, $words['indexed_words']);
|
||||||
|
$andResult = '';
|
||||||
|
foreach ($words['indexed_words'] as $sphinxWord)
|
||||||
|
$andResult .= (in_array($sphinxWord, $excluded_words) ? '-' : '') . smc_clean_word_sphinx($sphinxWord, $mySphinx) . ' & ';
|
||||||
|
$orResults[] = substr($andResult, 0, -3);
|
||||||
|
}
|
||||||
|
// If no search terms are left after comparing against excluded words (i.e. "test -test" or "test last -test -last"), sending that to Sphinx would result in a fatal error
|
||||||
|
if (!count(array_diff($inc_words, $excluded_words)))
|
||||||
|
// Instead, fail gracefully (return "no results")
|
||||||
|
return 0;
|
||||||
|
$query = count($orResults) === 1 ? $orResults[0] : '(' . implode(') | (', $orResults) . ')';
|
||||||
|
|
||||||
|
// Subject only searches need to be specified.
|
||||||
|
if ($search_params['subject_only'])
|
||||||
|
$query = '@(subject) ' . $query;
|
||||||
|
|
||||||
|
// Execute the search query.
|
||||||
|
$request = $mySphinx->Query($query, 'smf_index');
|
||||||
|
|
||||||
|
// Can a connection to the daemon be made?
|
||||||
|
if ($request === false)
|
||||||
|
{
|
||||||
|
// Just log the error.
|
||||||
|
if ($mySphinx->GetLastError())
|
||||||
|
log_error($mySphinx->GetLastError());
|
||||||
|
//echo 'error:'.$mySphinx->GetLastError();
|
||||||
|
fatal_lang_error('error_no_search_daemon');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the relevant information from the search results.
|
||||||
|
$cached_results = array(
|
||||||
|
'matches' => array(),
|
||||||
|
'num_results' => $request['total'],
|
||||||
|
);
|
||||||
|
if (isset($request['matches']))
|
||||||
|
foreach ($request['matches'] as $msgID => $match)
|
||||||
|
$cached_results['matches'][$msgID] = array(
|
||||||
|
'id' => $match['attrs']['id_topic'],
|
||||||
|
'relevance' => round($match['attrs']['relevance'] / 10000, 1) . '%',
|
||||||
|
'num_matches' => empty($search_params['topic']) ? $match['attrs']['@count'] : 0,
|
||||||
|
'matches' => array(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Store the search results in the cache.
|
||||||
|
cache_put_data('search_results_' . md5($user_info['query_see_board'] . '_' . $context['params']), $cached_results, 600);
|
||||||
|
}
|
||||||
|
|
||||||
|
$participants = array();
|
||||||
|
foreach (array_slice(array_keys($cached_results['matches']), $_REQUEST['start'], $modSettings['search_results_per_page']) as $msgID)
|
||||||
|
{
|
||||||
|
$context['topics'][$msgID] = $cached_results['matches'][$msgID];
|
||||||
|
$participants[$cached_results['matches'][$msgID]['id']] = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sentences need to be broken up in words for proper highlighting.
|
||||||
|
$search_results = array();
|
||||||
|
foreach ($search_words as $orIndex => $words)
|
||||||
|
$search_results = array_merge($search_results, $search_words[$orIndex]['subject_words']);
|
||||||
|
|
||||||
|
return $cached_results['num_results'];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up a search word/phrase/term for Sphinx
|
||||||
|
function smc_clean_word_sphinx($sphinx_term, $sphinx_client)
|
||||||
|
{
|
||||||
|
// Multiple quotation marks in a row can cause fatal errors, so handle them
|
||||||
|
$sphinx_term = preg_replace('/""+/', '"', $sphinx_term);
|
||||||
|
// Unmatched (i.e. odd number of) quotation marks also cause fatal errors, so handle them
|
||||||
|
if (substr_count($sphinx_term, '"') % 2)
|
||||||
|
// Using preg_replace since it supports limiting the number of replacements
|
||||||
|
$sphinx_term = preg_replace('/"/', '', $sphinx_term, 1);
|
||||||
|
// Use the Sphinx API's built-in EscapeString function to escape special characters
|
||||||
|
$sphinx_term = $sphinx_client->EscapeString($sphinx_term);
|
||||||
|
// Since it escapes quotation marks and we don't want that, unescape them
|
||||||
|
$sphinx_term = str_replace('\"', '"', $sphinx_term);
|
||||||
|
return $sphinx_term;
|
||||||
|
}
|
||||||
|
?>
|
1626
Sources/sphinxapi.php
Normal file
1626
Sources/sphinxapi.php
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user