diff --git a/database/Database.php b/database/Database.php index 32ddab40d583723b7aa09b639196c078012e1ee7..44bc5cccb0728c30669af7513c82bdea01979044 100644 --- a/database/Database.php +++ b/database/Database.php @@ -70,7 +70,9 @@ class Database private function apply_config(\PDOStatement $stmt) { if ($this->enable_ranking) { - $stmt->bindValue(':config_normalization', 4, PDO::PARAM_INT); + if (!($this->backend instanceof SQLiteSmartBackend)) { + $stmt->bindValue(':config_normalization', 4, PDO::PARAM_INT); + } $stmt->bindValue(':weight_content', 14, PDO::PARAM_INT); } @@ -134,8 +136,9 @@ class Database $stmt->bindParam(':ignore_network', $ignore_network, PDO::PARAM_INT); $stmt->bindParam(':ignore_sender', $ignore_sender, PDO::PARAM_INT); - $stmt->execute(); - return $stmt->fetchAll(\PDO::FETCH_ASSOC); + $success = $stmt->execute(); + $result = $stmt->fetchAll(\PDO::FETCH_ASSOC); + return $result; } public function findInBufferMultipleCount(string $query, string $since = null, string $before = null, string $buffer = null, string $network = null, string $sender = null, int $offset = 0, int $limit = 4): array @@ -168,7 +171,6 @@ class Database $success = $stmt->execute(); $result = $stmt->fetchAll(\PDO::FETCH_ASSOC); - return $result; } diff --git a/database/backends/SQLiteSmartBackend.php b/database/backends/SQLiteSmartBackend.php index 611b6a723139f40df39849ad1d218c6a44147b06..f414819c53e2f2f5cae5d5123ffcb214df24effb 100644 --- a/database/backends/SQLiteSmartBackend.php +++ b/database/backends/SQLiteSmartBackend.php @@ -4,15 +4,115 @@ namespace QuasselRestSearch; require_once 'Backend.php'; +/* +-- Check integrity of fts +-- INSERT INTO backlog_fts(backlog_fts) VALUES('integrity-check'); +-- INSERT INTO backlog_fts(backlog_fts, rank) VALUES('integrity-check', 0); +-- INSERT INTO backlog_fts(backlog_fts, rank) VALUES('integrity-check', 1); + +-- Optimize fts indexes +-- INSERT INTO backlog_fts(backlog_fts) VALUES('optimize'); + +-- Custom rank +-- INSERT INTO backlog_fts(backlog_fts, rank) VALUES('rank', 'bm25(10.0, 5.0)'); + +-- Phrase search example +select rowid, * from backlog_fts where backlog_fts match 'message: jeanm*' order by rank; + +-- Phrase highlight example +SELECT highlight(backlog_fts, 0, '<b>', '</b>') FROM backlog_fts WHERE backlog_fts MATCH ('message: ' || 'jeanmuch*') +*/ + class SQLiteSmartBackend implements Backend { private $db; private $options; + private $enable_ranking; + private $fts5_enabled; + private $opening_tag_marker; + private $closing_tag_marker; + private $opening_tag; + private $closing_tag; - function __construct(\PDO $db, array $options, $enable_ranking) + function __construct(\PDO $db, array $options, bool $enable_ranking) { $this->db = $db; + $timeout = $options["timeout"]; + $this->db->exec("PRAGMA busy_timeout = $timeout;"); $this->options = $options; + $this->enable_ranking = $enable_ranking; + $this->fts5_enabled = SQLiteSmartBackend::ensureFullTextSearchableDatabase($db); + $this->opening_tag_marker = chr(2); + $this->closing_tag_marker = chr(3); + $this->opening_tag = $this->opening_tag_marker . 'b' . $this->closing_tag_marker; + $this->closing_tag = $this->opening_tag_marker . '/b' . $this->closing_tag_marker; + } + + private static function ensureFullTextSearchableDatabase(\PDO $db): bool + { + // Check fts5 is supported + $stmt = $db->prepare("SELECT sqlite_compileoption_used('ENABLE_FTS5');"); + + if (!$stmt->execute()) { + return false; + } + + $record = $stmt->fetch(\PDO::FETCH_NUM); + + if (!$record || (int)$record[0] !== 1) { + return false; + } + + // Check if the fts5 table already exists + $stmt = $db->prepare("SELECT rowid FROM sqlite_master WHERE type='table' and name = 'backlog_fts'"); + + if (!$stmt->execute()) { + return false; + } + + $record = $stmt->fetch(\PDO::FETCH_NUM); + + if ($record && $record[0] > 0) { + return true; + } + + // Create the fts5 table, its update triggers and populate the initial indexes + try { + $oldMode = $db->getAttribute(\PDO::ATTR_ERRMODE); + + $db->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); + $db->beginTransaction(); + + $db->exec("CREATE VIRTUAL TABLE backlog_fts USING fts5(message, content='backlog', content_rowid='messageid');"); + + $db->exec(" + CREATE TRIGGER tsvectorupdate_ai AFTER INSERT ON backlog BEGIN + INSERT INTO backlog_fts(rowid, message) VALUES (new.messageid, new.message); + END; + "); + + $db->exec(" + CREATE TRIGGER tsvectorupdate_ad AFTER DELETE ON backlog BEGIN + INSERT INTO backlog_fts(backlog_fts, rowid, message) VALUES('delete', old.messageid, old.message); + END; + "); + + $db->exec(" + CREATE TRIGGER tsvectorupdate_au AFTER UPDATE ON backlog BEGIN + INSERT INTO backlog_fts(backlog_fts, rowid, message) VALUES('delete', old.messageid, old.message); + INSERT INTO backlog_fts(rowid, message) VALUES (new.messageid, new.message); + END; + "); + + $db->exec("INSERT INTO backlog_fts(backlog_fts) VALUES('rebuild');"); + + $db->setAttribute(\PDO::ATTR_ERRMODE, $oldMode); + $db->commit(); + return true; + } catch (\PDOException $e) { + $db->rollBack(); + return false; + } } public function findUser(): \PDOStatement @@ -24,24 +124,186 @@ class SQLiteSmartBackend implements Backend "); } + private function rankingFunction(): string + { + // TODO: Properly port missing support of Pow in sqlite + + if ($this->enable_ranking) { + return "( + (matching_backlog.rank << :weight_content) * + ((CASE + WHEN type IN (1, 4) THEN 1.0 + WHEN type IN (2, 1024, 2048, 4096, 16384) THEN 0.8 + WHEN type IN (32, 64, 128, 256, 512, 32768, 65536) THEN 0.6 + WHEN type IN (8, 16, 8192, 131072) THEN 0.4 + ELSE 0.2 END) << :weight_type) * + ((1 / (CAST(strftime('%s', 'now') AS INT) - (time / 1000))) << :weight_time) + )"; + } else { + return "( + ((CASE + WHEN type IN (1, 4) THEN 1.0 + WHEN type IN (2, 1024, 2048, 4096, 16384) THEN 0.8 + WHEN type IN (32, 64, 128, 256, 512, 32768, 65536) THEN 0.6 + WHEN type IN (8, 16, 8192, 131072) THEN 0.4 + ELSE 0.2 END) << :weight_type) * + ((1 / (CAST(strftime('%s', 'now') AS INT) - (time / 1000))) << :weight_time) + )"; + } + } + public function findInBuffers(): \PDOStatement { - // TODO: Implement findInBuffers() method. + $rankingFunction = $this->rankingFunction(); + return $this->db->prepare(" + WITH matching_backlog AS ( + SELECT rowid, snippet(backlog_fts, 0, '$this->opening_tag', '$this->closing_tag', '...', 64) AS message, rank FROM backlog_fts WHERE backlog_fts MATCH ('message: ' || :query) + ) + SELECT + ranked_messages.bufferid, + ranked_messages.buffername, + ranked_messages.networkname, + ranked_messages.messageid, + ranked_messages.type, + datetime(ranked_messages.time / 1000, 'unixepoch') AS time, + ranked_messages.sender, + replace(replace(replace(replace(ranked_messages.message, '<', '<'), '>', '>'), '$this->opening_tag_marker', '<'), '$this->closing_tag_marker', '>') AS message + FROM + (SELECT + matching_messages.*, + network.networkname, + sender.sender, + rank() + OVER ( + PARTITION BY matching_messages.bufferid + ORDER BY matching_messages.rank_value DESC + ) AS rank, + first_value(rank_value) + OVER ( + PARTITION BY matching_messages.bufferid + ORDER BY matching_messages.rank_value DESC + ) AS max_rank_value + FROM + (SELECT + backlog.messageid, + backlog.bufferid, + buffer.buffername, + buffer.networkid, + backlog.senderid, + backlog.type, + backlog.time, + matching_backlog.message, + $rankingFunction AS rank_value + FROM + backlog + JOIN buffer ON backlog.bufferid = buffer.bufferid + JOIN matching_backlog ON backlog.messageid = matching_backlog.rowid + WHERE buffer.userid = :userid + AND (:ignore_since OR backlog.time > CAST(strftime('%s', strftime('%Y-%m-%d', :since)) AS INT) * 1000) + AND (:ignore_before OR backlog.time < CAST(strftime('%s', strftime('%Y-%m-%d', :before)) AS INT) * 1000) + AND (:ignore_buffer OR buffer.buffername LIKE '%' || :buffer || '%') + AND backlog.type & 23559 > 0 + ) matching_messages + JOIN sender ON matching_messages.senderid = sender.senderid + JOIN network ON matching_messages.networkid = network.networkid + WHERE (:ignore_network OR network.networkname LIKE '%' || :network || '%') + AND (:ignore_sender OR sender.sender LIKE '%' || :sender || '%') + ) ranked_messages + WHERE ranked_messages.rank <= :limit + ORDER BY ranked_messages.max_rank_value DESC, ranked_messages.rank_value DESC; + "); } public function findInBuffersCount(): \PDOStatement { - // TODO: Implement findInBuffersCount() method. + return $this->db->prepare(" + WITH matching_backlog AS ( + SELECT rowid FROM backlog_fts WHERE backlog_fts MATCH ('message: ' || :query) + ) + SELECT + backlog.bufferid, + COUNT(*) > (:limit + :offset) AS hasmore + FROM + backlog + JOIN buffer ON backlog.bufferid = buffer.bufferid + JOIN sender ON backlog.senderid = sender.senderid + JOIN network ON buffer.networkid = network.networkid + JOIN matching_backlog ON backlog.messageid = matching_backlog.rowid + WHERE buffer.userid = :userid + AND (:ignore_since OR backlog.time > CAST(strftime('%s', strftime('%Y-%m-%d', :since)) AS INT) * 1000) + AND (:ignore_before OR backlog.time < CAST(strftime('%s', strftime('%Y-%m-%d', :before)) AS INT) * 1000) + AND (:ignore_buffer OR buffer.buffername LIKE '%' || :buffer || '%') + AND (:ignore_network OR network.networkname LIKE '%' || :network || '%') + AND (:ignore_sender OR sender.sender LIKE '%' || :sender || '%') + AND backlog.type & 23559 > 0 + GROUP BY backlog.bufferid; + "); } public function findInBuffer(): \PDOStatement { - // TODO: Implement findInBuffer() method. + $rankingFunction = $this->rankingFunction(); + return $this->db->prepare(" + WITH matching_backlog AS ( + SELECT rowid, snippet(backlog_fts, 0, '$this->opening_tag', '$this->closing_tag', '...', 64) AS message, rank FROM backlog_fts WHERE backlog_fts MATCH ('message: ' || :query) + ) + SELECT + matching_messages.messageid, + matching_messages.time, + datetime(matching_messages.time / 1000, 'unixepoch') AS time, + sender.sender, + replace(replace(replace(replace(matching_messages.message, '<', '<'), '>', '>'), '$this->opening_tag_marker', '<'), '$this->closing_tag_marker', '>') AS message + FROM + (SELECT + backlog.messageid, + backlog.bufferid, + buffer.buffername, + buffer.networkid, + backlog.senderid, + backlog.type, + backlog.time, + matching_backlog.message, + $rankingFunction AS rank_value + FROM + backlog + JOIN buffer ON backlog.bufferid = buffer.bufferid + JOIN matching_backlog ON backlog.messageid = matching_backlog.rowid + WHERE buffer.userid = :userid + AND buffer.bufferid = :bufferid + AND (:ignore_since OR backlog.time > CAST(strftime('%s', strftime('%Y-%m-%d', :since)) AS INT) * 1000) + AND (:ignore_before OR backlog.time < CAST(strftime('%s', strftime('%Y-%m-%d', :before)) AS INT) * 1000) + AND backlog.type & 23559 > 0 + ) matching_messages + JOIN sender ON matching_messages.senderid = sender.senderid + JOIN network ON matching_messages.networkid = network.networkid + WHERE (:ignore_sender OR sender.sender LIKE '%' || :sender || '%') + ORDER BY matching_messages.rank_value DESC + LIMIT :limit + OFFSET :offset + "); } public function findInBufferCount(): \PDOStatement { - // TODO: Implement findInBufferCount() method. + return $this->db->prepare(" + WITH matching_backlog AS ( + SELECT rowid FROM backlog_fts WHERE backlog_fts MATCH ('message: ' || :query) + ) + SELECT + COUNT(*) > (:limit + :offset) AS hasmore + FROM + backlog + JOIN buffer ON backlog.bufferid = buffer.bufferid + JOIN sender ON backlog.senderid = sender.senderid + JOIN matching_backlog ON backlog.messageid = matching_backlog.rowid + WHERE buffer.userid = :userid + AND backlog.bufferid = :bufferid + AND (:ignore_since OR backlog.time > CAST(strftime('%s', strftime('%Y-%m-%d', :since)) AS INT) * 1000) + AND (:ignore_before OR backlog.time < CAST(strftime('%s', strftime('%Y-%m-%d', :before)) AS INT) * 1000) + AND (:ignore_sender OR sender.sender LIKE '%' || :sender || '%') + AND backlog.type & 23559 > 0 + GROUP BY backlog.bufferid; + "); } public function loadAfter(): \PDOStatement @@ -51,16 +313,17 @@ class SQLiteSmartBackend implements Backend backlog.bufferid, buffer.buffername, sender.sender, - backlog.time, + datetime(backlog.time / 1000, 'unixepoch') AS time, network.networkname, replace(replace(replace(backlog.message, '&', '&'), '<', '<'), '>', '>') AS message - FROM backlog - JOIN sender ON backlog.senderid = sender.senderid - JOIN buffer ON backlog.bufferid = buffer.bufferid - JOIN network ON buffer.networkid = network.networkid + FROM + backlog + JOIN sender ON backlog.senderid = sender.senderid + JOIN buffer ON backlog.bufferid = buffer.bufferid + JOIN network ON buffer.networkid = network.networkid WHERE buffer.userid = :userid AND backlog.bufferid = :bufferid - AND backlog.messageid >= :anchor + AND backlog.messageid > :anchor ORDER BY backlog.messageid ASC LIMIT :limit; "); @@ -69,22 +332,24 @@ class SQLiteSmartBackend implements Backend public function loadBefore(): \PDOStatement { return $this->db->prepare(" - SELECT backlog.messageid, + SELECT * FROM (SELECT backlog.messageid, backlog.bufferid, buffer.buffername, sender.sender, - backlog.time, + datetime(backlog.time / 1000, 'unixepoch') AS time, network.networkname, replace(replace(replace(backlog.message, '&', '&'), '<', '<'), '>', '>') AS message - FROM backlog - JOIN sender ON backlog.senderid = sender.senderid - JOIN buffer ON backlog.bufferid = buffer.bufferid - JOIN network ON buffer.networkid = network.networkid + FROM + backlog + JOIN sender ON backlog.senderid = sender.senderid + JOIN buffer ON backlog.bufferid = buffer.bufferid + JOIN network ON buffer.networkid = network.networkid WHERE buffer.userid = :userid AND backlog.bufferid = :bufferid AND backlog.messageid < :anchor ORDER BY backlog.messageid DESC - LIMIT :limit; + LIMIT :limit) t + ORDER BY messageid ASC; "); } -} \ No newline at end of file +}