Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qv4regexp.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:significant
4// TODO: verifyj critical part should be in YARR
5
6#include "qv4regexp_p.h"
7#include "qv4engine_p.h"
9#include <private/qv4mm_p.h>
10#include <runtime/VM.h>
11
12using namespace QV4;
13
14#if ENABLE(YARR_JIT)
15static constexpr qsizetype LongStringJitThreshold = 1024;
16static constexpr int LongStringJitBoost = 3;
17#endif
18
19static JSC::RegExpFlags jscFlags(quint8 flags)
20{
21 JSC::RegExpFlags jscFlags = JSC::NoFlags;
22 if (flags & CompiledData::RegExp::RegExp_Global)
23 jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagGlobal);
24 if (flags & CompiledData::RegExp::RegExp_IgnoreCase)
25 jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagIgnoreCase);
26 if (flags & CompiledData::RegExp::RegExp_Multiline)
27 jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagMultiline);
28 if (flags & CompiledData::RegExp::RegExp_Unicode)
29 jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagUnicode);
30 if (flags & CompiledData::RegExp::RegExp_Sticky)
31 jscFlags = static_cast<JSC::RegExpFlags>(jscFlags | JSC::FlagSticky);
32 return jscFlags;
33}
34
35RegExpCache::~RegExpCache()
36{
37 for (RegExpCache::Iterator it = begin(), e = end(); it != e; ++it) {
38 if (RegExp *re = it.value().as<RegExp>())
39 re->d()->cache = nullptr;
40 }
41}
42
44
45uint RegExp::match(const QString &string, int start, uint *matchOffsets)
46{
47 if (!isValid())
48 return JSC::Yarr::offsetNoMatch;
49
50#if ENABLE(YARR_JIT)
51 auto *priv = d();
52
53 auto regenerateByteCode = [](Heap::RegExp *regexp) {
54 JSC::Yarr::ErrorCode error = JSC::Yarr::ErrorCode::NoError;
55 JSC::Yarr::YarrPattern yarrPattern(WTF::String(*regexp->pattern), jscFlags(regexp->flags),
56 error);
57
58 // As we successfully parsed the pattern before, we should still be able to.
59 Q_ASSERT(error == JSC::Yarr::ErrorCode::NoError);
60
61 regexp->byteCode = JSC::Yarr::byteCompile(
62 yarrPattern,
63 regexp->internalClass->engine->bumperPointerAllocator).release();
64 };
65
66 auto removeJitCode = [](Heap::RegExp *regexp) {
67 delete regexp->jitCode;
68 regexp->jitCode = nullptr;
69 regexp->jitFailed = true;
70 };
71
72 auto removeByteCode = [](Heap::RegExp *regexp) {
73 delete regexp->byteCode;
74 regexp->byteCode = nullptr;
75 };
76
77 if (!priv->jitCode) {
78
79 // Long strings count as more calls. We want the JIT to run earlier.
80 const bool longString = string.length() > LongStringJitThreshold;
81 if (longString)
82 priv->interpreterCallCount += LongStringJitBoost;
83
84 if (priv->internalClass->engine->canJIT(priv)) {
85 removeByteCode(priv);
86
87 JSC::Yarr::ErrorCode error = JSC::Yarr::ErrorCode::NoError;
88 JSC::Yarr::YarrPattern yarrPattern(
89 WTF::String(*priv->pattern), jscFlags(priv->flags), error);
90 if (!yarrPattern.m_containsBackreferences) {
91 priv->jitCode = new JSC::Yarr::YarrCodeBlock;
92 JSC::VM *vm = static_cast<JSC::VM *>(priv->internalClass->engine);
93 JSC::Yarr::jitCompile(yarrPattern, JSC::Yarr::Char16, vm, *priv->jitCode);
94 }
95
96 if (!priv->hasValidJITCode()) {
97 removeJitCode(priv);
98 regenerateByteCode(priv);
99 }
100 } else if (!longString) {
101 // Short strings do the regular post-increment to honor
102 // QV4_JIT_CALL_THRESHOLD.
103 ++priv->interpreterCallCount;
104 }
105 }
106#endif
107
108 WTF::String s(string);
109
110#if ENABLE(YARR_JIT)
111 if (priv->hasValidJITCode()) {
112 static const uint offsetJITFail = std::numeric_limits<unsigned>::max() - 1;
113 uint ret = JSC::Yarr::offsetNoMatch;
114#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
115 char buffer[8192];
116 ret = uint(priv->jitCode->execute(s.characters16(), start, s.size(),
117 (int*)matchOffsets, buffer, 8192).start);
118#else
119 ret = uint(priv->jitCode->execute(s.characters16(), start, s.length(),
120 (int*)matchOffsets).start);
121#endif
122 if (ret != offsetJITFail)
123 return ret;
124
125 removeJitCode(priv);
126 // JIT failed. We need byteCode to run the interpreter.
127 Q_ASSERT(!priv->byteCode);
128 regenerateByteCode(priv);
129 }
130#endif // ENABLE(YARR_JIT)
131
132 return JSC::Yarr::interpret(byteCode(), s.characters16(), string.size(), start, matchOffsets);
133}
134
135QString RegExp::getSubstitution(const QString &matched, const QString &str, int position, const Value *captures, int nCaptures, const QString &replacement)
136{
137 QString result;
138
139 int matchedLength = matched.size();
140 Q_ASSERT(position >= 0 && position <= str.size());
141 int tailPos = position + matchedLength;
142 int seenDollar = -1;
143 for (int i = 0; i < replacement.size(); ++i) {
144 QChar ch = replacement.at(i);
145 if (seenDollar >= 0) {
146 if (ch.unicode() == '$') {
147 result += QLatin1Char('$');
148 } else if (ch.unicode() == '&') {
149 result += matched;
150 } else if (ch.unicode() == '`') {
151 result += str.left(position);
152 } else if (ch.unicode() == '\'') {
153 result += str.mid(tailPos);
154 } else if (ch.unicode() >= '0' && ch.unicode() <= '9') {
155 int n = ch.unicode() - '0';
156 if (i + 1 < replacement.size()) {
157 ch = replacement.at(i + 1);
158 if (ch.unicode() >= '0' && ch.unicode() <= '9') {
159 n = n*10 + (ch.unicode() - '0');
160 ++i;
161 }
162 }
163 if (n > 0 && n <= nCaptures) {
164 String *s = captures[n].stringValue();
165 if (s)
166 result += s->toQString();
167 } else {
168 for (int j = seenDollar; j <= i; ++j)
169 result += replacement.at(j);
170 }
171 } else {
172 result += QLatin1Char('$');
173 result += ch;
174 }
175 seenDollar = -1;
176 } else {
177 if (ch == QLatin1Char('$')) {
178 seenDollar = i;
179 continue;
180 }
181 result += ch;
182 }
183 }
184 if (seenDollar >= 0)
185 result += QLatin1Char('$');
186 return result;
187}
188
189Heap::RegExp *RegExp::create(
190 ExecutionEngine *engine, const QString &pattern, CompiledData::RegExp::Flags flags)
191{
192 RegExpCacheKey key(pattern, flags);
193
194 RegExpCache *cache = engine->regExpCache;
195 if (!cache)
196 cache = engine->regExpCache = new RegExpCache;
197
198 QV4::WeakValue &cachedValue = (*cache)[key];
199 if (QV4::RegExp *result = cachedValue.as<RegExp>())
200 return result->d();
201
202 Scope scope(engine);
203 Scoped<RegExp> result(scope, engine->memoryManager->alloc<RegExp>(engine, pattern, flags));
204
205 result->d()->cache = cache;
206 cachedValue.set(engine, result);
207
208 return result->d();
209}
210
211void Heap::RegExp::init(ExecutionEngine *engine, const QString &pattern, uint flags)
212{
213 Base::init();
214 this->pattern = new QString(pattern);
215 this->flags = flags;
216
217 JSC::Yarr::ErrorCode error = JSC::Yarr::ErrorCode::NoError;
218 JSC::Yarr::YarrPattern yarrPattern(WTF::String(pattern), jscFlags(flags), error);
219 if (error != JSC::Yarr::ErrorCode::NoError)
220 return;
221 subPatternCount = yarrPattern.m_numSubpatterns;
222 Q_UNUSED(engine);
223 byteCode = JSC::Yarr::byteCompile(yarrPattern, internalClass->engine->bumperPointerAllocator).release();
224 if (byteCode)
225 valid = true;
226}
227
228void Heap::RegExp::destroy()
229{
230 if (cache) {
231 RegExpCacheKey key(this);
232 cache->remove(key);
233 }
234#if ENABLE(YARR_JIT)
235 delete jitCode;
236#endif
237 delete byteCode;
238 delete pattern;
239 Base::destroy();
240}
DEFINE_MANAGED_VTABLE(RegExp)
static JSC::RegExpFlags jscFlags(quint8 flags)
Definition qv4regexp.cpp:19