Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qpdfsearchmodel.cpp
Go to the documentation of this file.
1// Copyright (C) 2020 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
5#include "qpdflink.h"
9
10#include "third_party/pdfium/public/fpdf_text.h"
11#include "third_party/pdfium/public/fpdfview.h"
12
13#include <QtCore/qelapsedtimer.h>
14#include <QtCore/QMetaEnum>
15
17
18Q_PDF_LOGGING_CATEGORY(qLcS, "qt.pdf.search")
19
20static const int UpdateTimerInterval = 100;
21static const int ContextChars = 64;
22
23/*!
24 \class QPdfSearchModel
25 \since 5.15
26 \inmodule QtPdf
27 \inherits QAbstractListModel
28
29 \brief The QPdfSearchModel class searches for a string in a PDF document
30 and holds the results.
31
32 This is used in the \l {Model/View Programming} paradigm to display
33 a list of search results, to highlight them on the rendered PDF pages,
34 and to iterate through them using the "search forward" / "search backward"
35 buttons and shortcuts that would be found in a typical document-viewing UI:
36
37 \image search-results.png
38*/
39
40/*!
41 \enum QPdfSearchModel::Role
42
43 \value Page The page number where the search result is found (int).
44 \value IndexOnPage The index of the search result on the page (int).
45 \value Location The position of the search result on the page (QPointF).
46 \value ContextBefore The adjacent text on the page, before the search string (QString).
47 \value ContextAfter The adjacent text on the page, after the search string (QString).
48 \omitvalue NRoles
49
50 \sa QPdfLink
51*/
52
53/*!
54 Constructs a new search model with parent object \a parent.
55*/
56QPdfSearchModel::QPdfSearchModel(QObject *parent)
57 : QAbstractListModel(*(new QPdfSearchModelPrivate()), parent)
58{
59 QMetaEnum rolesMetaEnum = metaObject()->enumerator(metaObject()->indexOfEnumerator("Role"));
60 for (int r = Qt::UserRole; r < int(Role::NRoles); ++r) {
61 QByteArray roleName = QByteArray(rolesMetaEnum.valueToKey(r));
62 if (roleName.isEmpty())
63 continue;
64 roleName[0] = QChar::toLower(roleName[0]);
65 m_roleNames.insert(r, roleName);
66 }
67 connect(this, &QAbstractListModel::dataChanged, this, &QPdfSearchModel::countChanged);
68 connect(this, &QAbstractListModel::modelReset, this, &QPdfSearchModel::countChanged);
69 connect(this, &QAbstractListModel::rowsRemoved, this, &QPdfSearchModel::countChanged);
70 connect(this, &QAbstractListModel::rowsInserted, this, &QPdfSearchModel::countChanged);
71}
72
73/*!
74 Destroys the model.
75*/
76QPdfSearchModel::~QPdfSearchModel() {}
77
78/*!
79 \reimp
80*/
81QHash<int, QByteArray> QPdfSearchModel::roleNames() const
82{
83 return m_roleNames;
84}
85
86/*!
87 \reimp
88
89 The number of rows in the model is equal to the number of search results found.
90*/
91int QPdfSearchModel::rowCount(const QModelIndex &parent) const
92{
93 Q_D(const QPdfSearchModel);
94 Q_UNUSED(parent);
95 return d->rowCountSoFar;
96}
97
98/*!
99 \reimp
100*/
101QVariant QPdfSearchModel::data(const QModelIndex &index, int role) const
102{
103 Q_D(const QPdfSearchModel);
104 const auto pi = const_cast<QPdfSearchModelPrivate*>(d)->pageAndIndexForResult(index.row());
105 if (pi.page < 0)
106 return QVariant();
107 switch (Role(role)) {
108 case Role::Page:
109 return pi.page;
110 case Role::IndexOnPage:
111 return pi.index;
112 case Role::Location:
113 return d->searchResults[pi.page][pi.index].location();
114 case Role::ContextBefore:
115 return d->searchResults[pi.page][pi.index].contextBefore();
116 case Role::ContextAfter:
117 return d->searchResults[pi.page][pi.index].contextAfter();
118 case Role::NRoles:
119 break;
120 }
121 if (role == Qt::DisplayRole) {
122 const QString ret = d->searchResults[pi.page][pi.index].contextBefore() +
123 QLatin1String("<b>") + d->searchString + QLatin1String("</b>") +
124 d->searchResults[pi.page][pi.index].contextAfter();
125 return ret;
126 }
127 return QVariant();
128}
129
130/*!
131 \since 6.8
132 \property QPdfSearchModel::count
133 \brief the number of search results found
134*/
135int QPdfSearchModel::count() const
136{
137 return rowCount(QModelIndex());
138}
139
140void QPdfSearchModel::updatePage(int page)
141{
142 Q_D(QPdfSearchModel);
143 d->doSearch(page);
144}
145
146/*!
147 \property QPdfSearchModel::searchString
148 \brief the string to search for
149*/
150QString QPdfSearchModel::searchString() const
151{
152 Q_D(const QPdfSearchModel);
153 return d->searchString;
154}
155
156void QPdfSearchModel::setSearchString(const QString &searchString)
157{
158 Q_D(QPdfSearchModel);
159 if (d->searchString == searchString)
160 return;
161
162 d->searchString = searchString;
163 beginResetModel();
164 d->clearResults();
165 emit searchStringChanged();
166 endResetModel();
167}
168
169/*!
170 Returns the list of all results found on the given \a page.
171*/
172QList<QPdfLink> QPdfSearchModel::resultsOnPage(int page) const
173{
174 Q_D(const QPdfSearchModel);
175 const_cast<QPdfSearchModelPrivate *>(d)->doSearch(page);
176 if (d->searchResults.size() <= page)
177 return {};
178 return d->searchResults[page];
179}
180
181/*!
182 Returns a result found by \a index in the \l document, regardless of the
183 page on which it was found. \a index must be less than \l rowCount.
184*/
185QPdfLink QPdfSearchModel::resultAtIndex(int index) const
186{
187 Q_D(const QPdfSearchModel);
188 const auto pi = const_cast<QPdfSearchModelPrivate*>(d)->pageAndIndexForResult(index);
189 if (pi.page < 0 || index < 0)
190 return {};
191 return d->searchResults[pi.page][pi.index];
192}
193
194/*!
195 \property QPdfSearchModel::document
196 \brief the document to search
197*/
198QPdfDocument *QPdfSearchModel::document() const
199{
200 Q_D(const QPdfSearchModel);
201 return d->document;
202}
203
204void QPdfSearchModel::setDocument(QPdfDocument *document)
205{
206 Q_D(QPdfSearchModel);
207 if (d->document == document)
208 return;
209
210 disconnect(d->documentConnection);
211 d->documentConnection = connect(document, &QPdfDocument::pageCountChanged, this,
212 [this]() { d_func()->clearResults(); });
213
214 d->document = document;
215 d->clearResults();
216 emit documentChanged();
217}
218
219void QPdfSearchModel::timerEvent(QTimerEvent *event)
220{
221 Q_D(QPdfSearchModel);
222 if (event->timerId() != d->updateTimerId)
223 return;
224 if (!d->document || d->nextPageToUpdate >= d->document->pageCount()) {
225 if (d->document)
226 qCDebug(qLcS) << "done updating search results on" << d->searchResults.size() << "pages";
227 killTimer(d->updateTimerId);
228 d->updateTimerId = -1;
229 }
230 d->doSearch(d->nextPageToUpdate++);
231}
232
233QPdfSearchModelPrivate::QPdfSearchModelPrivate() : QAbstractItemModelPrivate()
234{
235}
236
238{
239 Q_Q(QPdfSearchModel);
240 rowCountSoFar = 0;
241 searchResults.clear();
242 pagesSearched.clear();
243 if (document) {
244 searchResults.resize(document->pageCount());
245 pagesSearched.resize(document->pageCount());
246 }
248 updateTimerId = q->startTimer(UpdateTimerInterval);
249}
250
252{
253 if (page < 0 || page >= pagesSearched.size() || searchString.isEmpty())
254 return false;
255 if (pagesSearched[page])
256 return true;
257 Q_Q(QPdfSearchModel);
258
259 const QPdfMutexLocker lock;
260 QElapsedTimer timer;
261 timer.start();
262 FPDF_PAGE pdfPage = FPDF_LoadPage(document->d->doc, page);
263 if (!pdfPage) {
264 qWarning() << "failed to load page" << page;
265 return false;
266 }
267 FPDF_TEXTPAGE textPage = FPDFText_LoadPage(pdfPage);
268 if (!textPage) {
269 qWarning() << "failed to load text of page" << page;
270 FPDF_ClosePage(pdfPage);
271 return false;
272 }
273 FPDF_SCHHANDLE sh = FPDFText_FindStart(textPage, searchString.utf16(), 0, 0);
274 QList<QPdfLink> newSearchResults;
275 constexpr double CharacterHitTolerance = 6.0;
276 while (FPDFText_FindNext(sh)) {
277 int idx = FPDFText_GetSchResultIndex(sh);
278 int count = FPDFText_GetSchCount(sh);
279 int rectCount = FPDFText_CountRects(textPage, idx, count);
280 QList<QRectF> rects;
281 int startIndex = -1;
282 int endIndex = -1;
283 for (int r = 0; r < rectCount; ++r) {
284 // get bounding box of search result in page coordinates
285 double left, top, right, bottom;
286 FPDFText_GetRect(textPage, r, &left, &top, &right, &bottom);
287 // deal with any internal PDF transforms and
288 // convert to the 1x (pixels = points) 4th-quadrant coordinate system
289 rects << document->d->mapPageToView(pdfPage, left, top, right, bottom);
290 if (r == 0) {
291 startIndex = FPDFText_GetCharIndexAtPos(textPage, left, top,
292 CharacterHitTolerance, CharacterHitTolerance);
293 }
294 if (r == rectCount - 1) {
295 endIndex = FPDFText_GetCharIndexAtPos(textPage, right, top,
296 CharacterHitTolerance, CharacterHitTolerance);
297 }
298 qCDebug(qLcS) << rects.last() << "char idx" << startIndex << "->" << endIndex
299 << "from page rect" << left << top << right << bottom;
300 }
301 QString contextBefore, contextAfter;
302 if (startIndex >= 0 || endIndex >= 0) {
303 startIndex = qMax(0, startIndex - ContextChars);
304 endIndex += ContextChars;
305 int count = endIndex - startIndex + 1;
306 if (count > 0) {
307 QList<ushort> buf(count + 1);
308 int len = FPDFText_GetText(textPage, startIndex, count, buf.data());
309 Q_ASSERT(len - 1 <= count); // len is number of characters written, including the terminator
310 QString context = QString::fromUtf16(
311 reinterpret_cast<const char16_t *>(buf.constData()), len - 1);
312 context = context.replace(QLatin1Char('\n'), QStringLiteral("\u23CE"));
313 context = context.remove(QLatin1Char('\r'));
314 // try to find the search string near the middle of the context if possible
315 int si = context.indexOf(searchString, ContextChars - 5, Qt::CaseInsensitive);
316 if (si < 0)
317 si = context.indexOf(searchString, Qt::CaseInsensitive);
318 if (si < 0)
319 qCDebug(qLcS) << "search string" << searchString << "not found in context" << context;
320 contextBefore = context.mid(0, si);
321 contextAfter = context.mid(si + searchString.size());
322 }
323 }
324 if (!rects.isEmpty())
325 newSearchResults << QPdfLink(page, rects, contextBefore, contextAfter);
326 }
327 FPDFText_FindClose(sh);
328 FPDFText_ClosePage(textPage);
329 FPDF_ClosePage(pdfPage);
330 qCDebug(qLcS) << searchString << "took" << timer.elapsed() << "ms to find"
331 << newSearchResults.size() << "results on page" << page;
332
333 pagesSearched[page] = true;
334 searchResults[page] = newSearchResults;
335 if (newSearchResults.size() > 0) {
336 int rowsBefore = rowsBeforePage(page);
337 qCDebug(qLcS) << "from row" << rowsBefore << "rowCount" << rowCountSoFar << "increasing by" << newSearchResults.size();
338 rowCountSoFar += newSearchResults.size();
339 q->beginInsertRows(QModelIndex(), rowsBefore, rowsBefore + newSearchResults.size() - 1);
340 q->endInsertRows();
341 }
342 return true;
343}
344
346{
347 if (pagesSearched.isEmpty())
348 return {-1, -1};
349 const int pageCount = document->pageCount();
350 int totalSoFar = 0;
351 int previousTotalSoFar = 0;
352 for (int page = 0; page < pageCount; ++page) {
353 if (!pagesSearched[page])
354 doSearch(page);
355 totalSoFar += searchResults[page].size();
356 if (totalSoFar > resultIndex)
357 return {page, resultIndex - previousTotalSoFar};
358 previousTotalSoFar = totalSoFar;
359 }
360 return {-1, -1};
361}
362
364{
365 int ret = 0;
366 for (int i = 0; i < page; ++i)
367 ret += searchResults[i].size();
368 return ret;
369}
370
371QT_END_NAMESPACE
372
373#include "moc_qpdfsearchmodel.cpp"
PageAndIndex pageAndIndexForResult(int resultIndex)
static const int ContextChars
#define Q_PDF_LOGGING_CATEGORY(name,...)