Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qhelpsearchindexwriter.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
5#include "qhelp_global.h"
8
9#include <QtTools/qttools-config.h>
10#include <QtCore/qdatastream.h>
11#include <QtCore/qdatetime.h>
12#include <QtCore/qdir.h>
13#include <QtCore/qstringconverter.h>
14#include <QtCore/qtextstream.h>
15#include <QtCore/qurl.h>
16#include <QtCore/qvariant.h>
17#if QT_CONFIG(fullqthelp)
18# include <QtGui/qtextdocument.h>
19#endif
20#include <QtSql/qsqldatabase.h>
21#include <QtSql/qsqldriver.h>
22#include <QtSql/qsqlerror.h>
23#include <QtSql/qsqlquery.h>
24
26
27using namespace Qt::StringLiterals;
28
29namespace fulltextsearch {
30
31const char FTS_DB_NAME[] = "fts";
32
33class Writer
34{
35public:
36 Writer(const QString &path);
38
39 bool tryInit(bool reindex);
40 void flush();
41
42 void removeNamespace(const QString &namespaceName);
43 bool hasNamespace(const QString &namespaceName);
44 void insertDoc(const QString &namespaceName,
45 const QString &attributes,
46 const QString &url,
47 const QString &title,
48 const QString &contents);
51
52private:
53 void init(bool reindex);
54 bool hasDB();
55 void clearLegacyIndex();
56
57 const QString m_dbDir;
58 QString m_uniqueId;
59
60 bool m_needOptimize = false;
61 QSqlDatabase m_db;
62 QVariantList m_namespaces;
63 QVariantList m_attributes;
64 QVariantList m_urls;
65 QVariantList m_titles;
66 QVariantList m_contents;
67};
68
69Writer::Writer(const QString &path)
70 : m_dbDir(path)
71{
72 clearLegacyIndex();
73 QDir().mkpath(m_dbDir);
74 m_uniqueId = QHelpGlobal::uniquifyConnectionName("QHelpWriter"_L1, this);
75 m_db = QSqlDatabase::addDatabase("QSQLITE"_L1, m_uniqueId);
76 const QString dbPath = m_dbDir + u'/' + QLatin1StringView(FTS_DB_NAME);
77 m_db.setDatabaseName(dbPath);
78 if (!m_db.open()) {
79 const QString &error = QHelpSearchIndexWriter::tr(
80 "Cannot open database \"%1\" using connection \"%2\": %3")
81 .arg(dbPath, m_uniqueId, m_db.lastError().text());
82 qWarning("%s", qUtf8Printable(error));
83 m_db = {};
84 QSqlDatabase::removeDatabase(m_uniqueId);
85 m_uniqueId.clear();
86 } else {
88 }
89}
90
91bool Writer::tryInit(bool reindex)
92{
93 if (!m_db.isValid())
94 return true;
95
96 QSqlQuery query(m_db);
97 // HACK: we try to perform any modifying command just to check if
98 // we don't get SQLITE_BUSY code (SQLITE_BUSY is defined to 5 in sqlite driver)
99 if (!query.exec("CREATE TABLE foo ();"_L1) && query.lastError().nativeErrorCode() == "5"_L1) // db is locked
100 return false;
101
102 // HACK: clear what we have created
103 query.exec("DROP TABLE foo;"_L1);
104
105 init(reindex);
106 return true;
107}
108
109bool Writer::hasDB()
110{
111 if (!m_db.isValid())
112 return false;
113
114 QSqlQuery query(m_db);
115 query.prepare("SELECT id FROM info LIMIT 1"_L1);
116 query.exec();
117 return query.next();
118}
119
120void Writer::clearLegacyIndex()
121{
122 // Clear old legacy clucene index.
123 // More important in case of Creator, since
124 // the index folder is common for all Creator versions
125 QDir dir(m_dbDir);
126 if (!dir.exists())
127 return;
128
129 const QStringList &list = dir.entryList(QDir::Files | QDir::Hidden);
130 if (!list.contains(QLatin1StringView(FTS_DB_NAME))) {
131 for (const QString &item : list)
132 dir.remove(item);
133 }
134}
135
136void Writer::init(bool reindex)
137{
138 if (!m_db.isValid())
139 return;
140
141 QSqlQuery query(m_db);
142
143 if (reindex && hasDB()) {
144 m_needOptimize = true;
145
146 query.exec("DROP TABLE titles;"_L1);
147 query.exec("DROP TABLE contents;"_L1);
148 query.exec("DROP TABLE info;"_L1);
149 }
150
151 query.exec("CREATE TABLE info (id INTEGER PRIMARY KEY, namespace, attributes, url, title, data);"_L1);
152
153 query.exec("CREATE VIRTUAL TABLE titles USING fts5("
154 "namespace UNINDEXED, attributes UNINDEXED, "
155 "url UNINDEXED, title, "
156 "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"_L1);
157 query.exec("CREATE TRIGGER titles_insert AFTER INSERT ON info BEGIN "
158 "INSERT INTO titles(rowid, namespace, attributes, url, title) "
159 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); "
160 "END;"_L1);
161 query.exec("CREATE TRIGGER titles_delete AFTER DELETE ON info BEGIN "
162 "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) "
163 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); "
164 "END;"_L1);
165 query.exec("CREATE TRIGGER titles_update AFTER UPDATE ON info BEGIN "
166 "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) "
167 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); "
168 "INSERT INTO titles(rowid, namespace, attributes, url, title) "
169 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); "
170 "END;"_L1);
171
172 query.exec("CREATE VIRTUAL TABLE contents USING fts5("
173 "namespace UNINDEXED, attributes UNINDEXED, "
174 "url UNINDEXED, title, data, "
175 "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"_L1);
176 query.exec("CREATE TRIGGER contents_insert AFTER INSERT ON info BEGIN "
177 "INSERT INTO contents(rowid, namespace, attributes, url, title, data) "
178 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); "
179 "END;"_L1);
180 query.exec("CREATE TRIGGER contents_delete AFTER DELETE ON info BEGIN "
181 "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) "
182 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); "
183 "END;"_L1);
184 query.exec("CREATE TRIGGER contents_update AFTER UPDATE ON info BEGIN "
185 "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) "
186 "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); "
187 "INSERT INTO contents(rowid, namespace, attributes, url, title, data) "
188 "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); "
189 "END;"_L1);
190}
191
193{
194 if (m_db.isValid())
195 m_db.close();
196 m_db = {};
197 if (!m_uniqueId.isEmpty())
198 QSqlDatabase::removeDatabase(m_uniqueId);
199}
200
202{
203 if (!m_db.isValid())
204 return;
205
206 QSqlQuery query(m_db);
207 query.prepare("INSERT INTO info (namespace, attributes, url, title, data) VALUES (?, ?, ?, ?, ?)"_L1);
208 query.addBindValue(m_namespaces);
209 query.addBindValue(m_attributes);
210 query.addBindValue(m_urls);
211 query.addBindValue(m_titles);
212 query.addBindValue(m_contents);
213 query.execBatch();
214
215 m_namespaces.clear();
216 m_attributes.clear();
217 m_urls.clear();
218 m_titles.clear();
219 m_contents.clear();
220}
221
222void Writer::removeNamespace(const QString &namespaceName)
223{
224 if (!m_db.isValid() || !hasNamespace(namespaceName)) // no data to delete
225 return;
226
227 m_needOptimize = true;
228 QSqlQuery query(m_db);
229 query.prepare("DELETE FROM info WHERE namespace = ?"_L1);
230 query.addBindValue(namespaceName);
231 query.exec();
232}
233
234bool Writer::hasNamespace(const QString &namespaceName)
235{
236 if (!m_db.isValid())
237 return false;
238
239 QSqlQuery query(m_db);
240 query.prepare("SELECT id FROM info WHERE namespace = ? LIMIT 1"_L1);
241 query.addBindValue(namespaceName);
242 query.exec();
243 return query.next();
244}
245
246void Writer::insertDoc(const QString &namespaceName,
247 const QString &attributes,
248 const QString &url,
249 const QString &title,
250 const QString &contents)
251{
252 m_namespaces.append(namespaceName);
253 m_attributes.append(attributes);
254 m_urls.append(url);
255 m_titles.append(title);
256 m_contents.append(contents);
257}
258
260{
261 if (!m_db.isValid())
262 return;
263
264 m_needOptimize = false;
265 if (m_db.driver()->hasFeature(QSqlDriver::Transactions))
266 m_db.transaction();
267}
268
270{
271 if (!m_db.isValid())
272 return;
273
274 QSqlQuery query(m_db);
275
276 if (m_needOptimize) {
277 query.exec("INSERT INTO titles(titles) VALUES('rebuild')"_L1);
278 query.exec("INSERT INTO contents(contents) VALUES('rebuild')"_L1);
279 }
280
281 if (m_db.driver()->hasFeature(QSqlDriver::Transactions))
282 m_db.commit();
283
284 if (m_needOptimize)
285 query.exec("VACUUM"_L1);
286}
287
288QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
289{
290 m_mutex.lock();
291 this->m_cancel = true;
292 m_mutex.unlock();
293 wait();
294}
295
297{
298 QMutexLocker lock(&m_mutex);
299 m_cancel = true;
300}
301
302void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
303 const QString &indexFilesFolder, bool reindex)
304{
305 wait();
306 QMutexLocker lock(&m_mutex);
307
308 m_cancel = false;
309 m_reindex = reindex;
310 m_collectionFile = collectionFile;
311 m_indexFilesFolder = indexFilesFolder;
312
313 lock.unlock();
314
315 start(QThread::LowestPriority);
316}
317
318static const char IndexedNamespacesKey[] = "FTS5IndexedNamespaces";
319
320static QMap<QString, QDateTime> readIndexMap(const QHelpEngineCore &engine)
321{
322 QMap<QString, QDateTime> indexMap;
323 QDataStream dataStream(
324 engine.customValue(QLatin1StringView(IndexedNamespacesKey)).toByteArray());
325 dataStream >> indexMap;
326 return indexMap;
327}
328
336
341
343{
344 QMutexLocker lock(&m_mutex);
345
346 if (m_cancel)
347 return;
348
349 const bool reindex(m_reindex);
350 const QString collectionFile(m_collectionFile);
351 const QString indexPath(m_indexFilesFolder);
352
353 lock.unlock();
354
355 QHelpEngineCore engine(collectionFile, nullptr);
356 if (!engine.setupData())
357 return;
358
359 if (reindex)
360 clearIndexMap(&engine);
361
362 emit indexingStarted();
363
364 Writer writer(indexPath);
365
366 while (!writer.tryInit(reindex))
367 sleep(1);
368
369 const QStringList &registeredDocs = engine.registeredDocumentations();
370 QMap<QString, QDateTime> indexMap = readIndexMap(engine);
371
372 if (!reindex) {
373 for (const QString &namespaceName : registeredDocs) {
374 const auto it = indexMap.constFind(namespaceName);
375 if (it != indexMap.constEnd()) {
376 const QString path = engine.documentationFileName(namespaceName);
377 if (*it < QFileInfo(path).lastModified()) {
378 // Remove some outdated indexed stuff
379 indexMap.erase(it);
380 writer.removeNamespace(namespaceName);
381 } else if (!writer.hasNamespace(namespaceName)) {
382 // No data in fts db for namespace.
383 // The namespace could have been removed from fts db
384 // or the whole fts db have been removed
385 // without removing it from indexMap.
386 indexMap.erase(it);
387 }
388 } else {
389 // Needed in case namespaceName was removed from indexMap
390 // without removing it from fts db.
391 // May happen when e.g. qch file was removed manually
392 // without removing fts db.
393 writer.removeNamespace(namespaceName);
394 }
395 // TODO: we may also detect if there are any other data
396 // and remove it
397 }
398 } else {
399 indexMap.clear();
400 }
401
402 auto it = indexMap.begin();
403 while (it != indexMap.end()) {
404 if (!registeredDocs.contains(it.key())) {
405 writer.removeNamespace(it.key());
406 it = indexMap.erase(it);
407 } else {
408 ++it;
409 }
410 }
411
412 for (const QString &namespaceName : registeredDocs) {
413 lock.relock();
414 if (m_cancel) {
415 // store what we have done so far
416 writeIndexMap(&engine, indexMap);
417 writer.endTransaction();
418 emit indexingFinished();
419 return;
420 }
421 lock.unlock();
422
423 // if indexed, continue
424 if (indexMap.contains(namespaceName))
425 continue;
426
427 const QString fileName = engine.documentationFileName(namespaceName);
428 QHelpDBReader reader(fileName, QHelpGlobal::uniquifyConnectionName(
429 fileName, this), nullptr);
430 if (!reader.init())
431 continue;
432
433 const QString virtualFolder = reader.virtualFolder();
434
435 const QList<QStringList> &attributeSets =
436 engine.filterAttributeSets(namespaceName);
437
438 for (const QStringList &attributes : attributeSets) {
439 const QString &attributesString = attributes.join(u'|');
440
441 const auto htmlFiles = reader.filesData(attributes, "html"_L1);
442 const auto htmFiles = reader.filesData(attributes, "htm"_L1);
443 const auto txtFiles = reader.filesData(attributes, "txt"_L1);
444
445 auto files = htmlFiles;
446 files.unite(htmFiles);
447 files.unite(txtFiles);
448
449 for (auto it = files.cbegin(), end = files.cend(); it != end ; ++it) {
450 lock.relock();
451 if (m_cancel) {
452 // store what we have done so far
453 writeIndexMap(&engine, indexMap);
454 writer.endTransaction();
455 emit indexingFinished();
456 return;
457 }
458 lock.unlock();
459
460 const QString &file = it.key();
461 const QByteArray &data = it.value();
462
463 if (data.isEmpty())
464 continue;
465
466 QUrl url;
467 url.setScheme("qthelp"_L1);
468 url.setAuthority(namespaceName);
469 url.setPath(u'/' + virtualFolder + u'/' + file);
470
471 if (url.hasFragment())
472 url.setFragment({});
473
474 const QString &fullFileName = url.toString();
475 if (!fullFileName.endsWith(".html"_L1) && !fullFileName.endsWith(".htm"_L1)
476 && !fullFileName.endsWith(".txt"_L1)) {
477 continue;
478 }
479
480 QTextStream s(data);
481 auto encoding = QStringDecoder::encodingForHtml(data);
482 if (encoding)
483 s.setEncoding(*encoding);
484
485 const QString &text = s.readAll();
486 if (text.isEmpty())
487 continue;
488
489 QString title;
490 QString contents;
491 if (fullFileName.endsWith(".txt"_L1)) {
492 title = fullFileName.mid(fullFileName.lastIndexOf(u'/') + 1);
493 contents = text.toHtmlEscaped();
494#if QT_CONFIG(fullqthelp)
495 } else {
496 QTextDocument doc;
497 doc.setHtml(text);
498
499 title = doc.metaInformation(QTextDocument::DocumentTitle).toHtmlEscaped();
500 contents = doc.toPlainText().toHtmlEscaped();
501#endif
502 }
503
504 writer.insertDoc(namespaceName, attributesString, fullFileName, title, contents);
505 }
506 }
507 writer.flush();
508 const QString &path = engine.documentationFileName(namespaceName);
509 indexMap.insert(namespaceName, QFileInfo(path).lastModified());
510 }
511
512 writeIndexMap(&engine, indexMap);
513
514 writer.endTransaction();
515 emit indexingFinished();
516}
517
518} // namespace fulltextsearch
519
520QT_END_NAMESPACE
The QHelpEngineCore class provides the core functionality of the help system.
void updateIndex(const QString &collectionFile, const QString &indexFilesFolder, bool reindex)
void removeNamespace(const QString &namespaceName)
void insertDoc(const QString &namespaceName, const QString &attributes, const QString &url, const QString &title, const QString &contents)
bool hasNamespace(const QString &namespaceName)
static bool writeIndexMap(QHelpEngineCore *engine, const QMap< QString, QDateTime > &indexMap)
static const char IndexedNamespacesKey[]
static bool clearIndexMap(QHelpEngineCore *engine)
static QMap< QString, QDateTime > readIndexMap(const QHelpEngineCore &engine)