7#include "core/fpdftext/cpdf_textpagefind.h"
13#include "core/fpdftext/cpdf_textpage.h"
14#include "core/fxcrt/fx_extension.h"
15#include "core/fxcrt/fx_string.h"
16#include "core/fxcrt/fx_system.h"
17#include "core/fxcrt/fx_unicode.h"
18#include "core/fxcrt/stl_util.h"
19#include "third_party/base/check.h"
20#include "third_party/base/memory/ptr_util.h"
24constexpr wchar_t kNonBreakingSpace = 160;
26bool IsIgnoreSpaceCharacter(
wchar_t curChar) {
27 if (curChar < 255 || (curChar >= 0x0600 && curChar <= 0x06FF) ||
28 (curChar >= 0xFE70 && curChar <= 0xFEFF) ||
29 (curChar >= 0xFB50 && curChar <= 0xFDFF) ||
30 (curChar >= 0x0400 && curChar <= 0x04FF) ||
31 (curChar >= 0x0500 && curChar <= 0x052F) ||
32 (curChar >= 0xA640 && curChar <= 0xA69F) ||
33 (curChar >= 0x2DE0 && curChar <= 0x2DFF) || curChar == 8467 ||
34 (curChar >= 0x2000 && curChar <= 0x206F)) {
40bool IsMatchWholeWord(
const WideString& csPageText,
43 if (startPos > endPos)
45 wchar_t char_left = 0;
46 wchar_t char_right = 0;
47 size_t char_count = endPos - startPos + 1;
50 if (char_count == 1 && csPageText[startPos] > 255)
53 char_left = csPageText[startPos - 1];
54 if (startPos + char_count < csPageText.GetLength())
55 char_right = csPageText[startPos + char_count];
56 if ((char_left >
'A' && char_left <
'a') ||
57 (char_left >
'a' && char_left <
'z') ||
58 (char_left > 0xfb00 && char_left < 0xfb06) ||
60 (char_right >
'A' && char_right <
'a') ||
61 (char_right >
'a' && char_right <
'z') ||
62 (char_right > 0xfb00 && char_right < 0xfb06) ||
66 if (!((
'A' > char_left || char_left >
'Z') &&
67 (
'a' > char_left || char_left >
'z') &&
68 (
'A' > char_right || char_right >
'Z') &&
69 (
'a' > char_right || char_right >
'z'))) {
74 FXSYS_IsDecimalDigit(csPageText[startPos])) {
78 FXSYS_IsDecimalDigit(csPageText[endPos])) {
85WideString GetStringCase(
const WideString& wsOriginal,
bool bMatchCase) {
89 WideString wsLower = wsOriginal;
94absl::optional<WideString> ExtractSubString(
const wchar_t* lpszFullString,
96 DCHECK(lpszFullString);
98 while (iSubString--) {
99 lpszFullString = wcschr(lpszFullString, L' ');
101 return absl::nullopt;
104 while (*lpszFullString == L' ')
108 const wchar_t* lpchEnd = wcschr(lpszFullString, L' ');
109 int nLen = lpchEnd ?
static_cast<
int>(lpchEnd - lpszFullString)
110 :
static_cast<
int>(wcslen(lpszFullString));
112 return absl::nullopt;
114 return WideString(lpszFullString,
static_cast<size_t>(nLen));
117std::vector<WideString> ExtractFindWhat(
const WideString& findwhat) {
118 std::vector<WideString> findwhat_array;
120 size_t len = findwhat.GetLength();
122 for (i = 0; i < len; ++i)
123 if (findwhat[i] !=
' ')
126 findwhat_array.push_back(findwhat);
127 return findwhat_array;
132 absl::optional<WideString> word = ExtractSubString(findwhat.c_str(), index);
133 if (!word.has_value())
136 if (word->IsEmpty()) {
137 findwhat_array.push_back(L"");
143 while (pos < word->GetLength()) {
144 WideString curStr = word->Substr(pos, 1);
145 wchar_t curChar = word.value()[pos];
146 if (IsIgnoreSpaceCharacter(curChar)) {
152 findwhat_array.push_back(word->First(pos));
153 findwhat_array.push_back(curStr);
154 if (pos == word->GetLength() - 1) {
158 word.emplace(word->Last(word->GetLength() - pos - 1));
165 if (!word->IsEmpty())
166 findwhat_array.push_back(word.value());
169 return findwhat_array;
177 const WideString& findwhat,
179 absl::optional<size_t> startPos) {
180 std::vector<WideString> findwhat_array =
181 ExtractFindWhat(GetStringCase(findwhat, options.bMatchCase));
182 auto find = pdfium::WrapUnique(
190 const std::vector<WideString>& findwhat_array,
192 absl::optional<size_t> startPos)
193 : m_pTextPage(pTextPage),
194 m_strText(GetStringCase(pTextPage->GetAllPageText(), options.bMatchCase)),
195 m_csFindWhatArray(findwhat_array),
197 if (!m_strText.IsEmpty()) {
198 m_findNextStart = startPos;
199 m_findPreStart = startPos.value_or(m_strText.GetLength() - 1);
206 return m_pTextPage->CharIndexFromTextIndex(index);
210 return m_strText.IsEmpty() || !m_csFindWhatArray.empty();
214 if (m_strText.IsEmpty() || !m_findNextStart.has_value())
217 const size_t strLen = m_strText.GetLength();
218 size_t nStartPos = m_findNextStart.value();
219 if (nStartPos >= strLen) {
223 int nCount = fxcrt::CollectionSize<
int>(m_csFindWhatArray);
224 absl::optional<size_t> nResultPos = 0;
225 bool bSpaceStart =
false;
226 for (
int iWord = 0; iWord < nCount; iWord++) {
227 WideString csWord = m_csFindWhatArray[iWord];
229 if (iWord == nCount - 1) {
230 if (nStartPos >= strLen) {
233 wchar_t strInsert = m_strText[nStartPos];
234 if (strInsert == L'\n' || strInsert == L' ' || strInsert == L'\r' ||
235 strInsert == kNonBreakingSpace) {
236 nResultPos = nStartPos + 1;
240 }
else if (iWord == 0) {
245 nResultPos = m_strText.Find(csWord.AsStringView(), nStartPos);
246 if (!nResultPos.has_value())
249 size_t endIndex = nResultPos.value() + csWord.GetLength() - 1;
251 m_resStart = nResultPos.value();
253 if (iWord != 0 && !bSpaceStart) {
254 size_t PreResEndPos = nStartPos;
255 int curChar = csWord
[0
];
256 WideString lastWord = m_csFindWhatArray[iWord - 1];
257 int lastChar = lastWord
.Back();
258 if (nStartPos == nResultPos.value() &&
259 !(IsIgnoreSpaceCharacter(lastChar) ||
260 IsIgnoreSpaceCharacter(curChar))) {
263 for (size_t d = PreResEndPos; d < nResultPos.value(); d++) {
264 wchar_t strInsert = m_strText[d];
265 if (strInsert != L'\n' && strInsert != L' ' && strInsert != L'\r' &&
266 strInsert != kNonBreakingSpace) {
271 }
else if (bSpaceStart) {
272 if (nResultPos.value() > 0) {
273 wchar_t strInsert = m_strText[nResultPos.value() - 1];
274 if (strInsert != L'\n' && strInsert != L' ' && strInsert != L'\r' &&
275 strInsert != kNonBreakingSpace) {
277 m_resStart = nResultPos.value();
279 m_resStart = nResultPos.value() - 1;
283 if (m_options.bMatchWholeWord && bMatch)
284 bMatch = IsMatchWholeWord(m_strText, nResultPos.value(), endIndex);
287 nStartPos = endIndex + 1;
290 size_t index = bSpaceStart ? 1 : 0;
291 nStartPos = m_resStart + m_csFindWhatArray[index].GetLength();
294 m_resEnd = nResultPos.value() + m_csFindWhatArray.back().GetLength() - 1;
296 m_findNextStart = m_resStart + 1;
297 m_findPreStart = m_resEnd - 1;
299 m_findNextStart = m_resEnd + 1;
300 m_findPreStart = m_resStart - 1;
306 if (m_strText.IsEmpty() || !m_findPreStart.has_value())
310 if (!find_engine.FindFirst())
318 int temp = cur_order + cur_match;
319 if (temp < 0 ||
static_cast<size_t>(temp) > m_findPreStart.value() + 1)
328 m_resStart = m_pTextPage->TextIndexFromCharIndex(order);
329 m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + matches - 1);
331 m_findNextStart = m_resStart + 1;
332 m_findPreStart = m_resEnd - 1;
334 m_findNextStart = m_resEnd + 1;
335 m_findPreStart = m_resStart - 1;
341 return GetCharIndex(m_resStart);
345 int resStart = GetCharIndex(m_resStart);
346 int resEnd = GetCharIndex(m_resEnd);
347 return resEnd - resStart + 1;
int GetMatchedCount() const
CharType operator[](const size_t index) const
bool FXSYS_IsDecimalDigit(wchar_t c)
constexpr wchar_t kRightSingleQuotationMark