Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qmimeglobpattern.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4
6
7#if QT_CONFIG(regularexpression)
8#include <QRegularExpression>
9#endif
10#include <QStringList>
11#include <QDebug>
12
14
15using namespace Qt::StringLiterals;
16
17/*!
18 \internal
19 \class QMimeGlobMatchResult
20 \inmodule QtCore
21 \brief The QMimeGlobMatchResult class accumulates results from glob matching.
22
23 Handles glob weights, and preferring longer matches over shorter matches.
24*/
25
26void QMimeGlobMatchResult::addMatch(const QString &mimeType, int weight, const QString &pattern,
27 qsizetype knownSuffixLength)
28{
29 if (m_allMatchingMimeTypes.contains(mimeType))
30 return;
31 // Is this a lower-weight pattern than the last match? Skip this match then.
32 if (weight < m_weight) {
33 m_allMatchingMimeTypes.append(mimeType);
34 return;
35 }
36 bool replace = weight > m_weight;
37 if (!replace) {
38 // Compare the length of the match
39 if (pattern.size() < m_matchingPatternLength)
40 return; // too short, ignore
41 else if (pattern.size() > m_matchingPatternLength) {
42 // longer: clear any previous match (like *.bz2, when pattern is *.tar.bz2)
43 replace = true;
44 }
45 }
46 if (replace) {
47 m_matchingMimeTypes.clear();
48 // remember the new "longer" length
49 m_matchingPatternLength = pattern.size();
50 m_weight = weight;
51 }
52 if (!m_matchingMimeTypes.contains(mimeType)) {
53 m_matchingMimeTypes.append(mimeType);
54 if (replace)
55 m_allMatchingMimeTypes.prepend(mimeType); // highest-weight first
56 else
57 m_allMatchingMimeTypes.append(mimeType);
58 m_knownSuffixLength = knownSuffixLength;
59 }
60}
61
62QMimeGlobPattern::PatternType QMimeGlobPattern::detectPatternType(QStringView pattern) const
63{
64 const qsizetype patternLength = pattern.size();
65 if (!patternLength)
66 return OtherPattern;
67
68 const qsizetype starCount = pattern.count(u'*');
69 const bool hasSquareBracket = pattern.indexOf(u'[') != -1;
70 const bool hasQuestionMark = pattern.indexOf(u'?') != -1;
71
72 if (!hasSquareBracket && !hasQuestionMark) {
73 if (starCount == 1) {
74 // Patterns like "*~", "*.extension"
75 if (pattern.at(0) == u'*')
76 return SuffixPattern;
77 // Patterns like "README*" (well this is currently the only one like that...)
78 if (pattern.at(patternLength - 1) == u'*')
79 return PrefixPattern;
80 } else if (starCount == 0) {
81 // Names without any wildcards like "README"
82 return LiteralPattern;
83 }
84 }
85
86 if (pattern == "[0-9][0-9][0-9].vdr"_L1)
87 return VdrPattern;
88
89 if (pattern == "*.anim[1-9j]"_L1)
90 return AnimPattern;
91
92 return OtherPattern;
93}
94
95
96/*!
97 \internal
98 \class QMimeGlobPattern
99 \inmodule QtCore
100 \brief The QMimeGlobPattern class contains the glob pattern for file names for MIME type matching.
101
102 \sa QMimeType, QMimeDatabase, QMimeMagicRuleMatcher, QMimeMagicRule
103*/
104
105bool QMimeGlobPattern::matchFileName(const QString &inputFileName) const
106{
107 // "Applications MUST match globs case-insensitively, except when the case-sensitive
108 // attribute is set to true."
109 // The constructor takes care of putting case-insensitive patterns in lowercase.
110 const QString fileName = m_caseSensitivity == Qt::CaseInsensitive
111 ? inputFileName.toLower() : inputFileName;
112
113 const qsizetype patternLength = m_pattern.size();
114 if (!patternLength)
115 return false;
116 const qsizetype fileNameLength = fileName.size();
117
118 switch (m_patternType) {
119 case SuffixPattern: {
120 if (fileNameLength + 1 < patternLength)
121 return false;
122
123 const QChar *c1 = m_pattern.unicode() + patternLength - 1;
124 const QChar *c2 = fileName.unicode() + fileNameLength - 1;
125 int cnt = 1;
126 while (cnt < patternLength && *c1-- == *c2--)
127 ++cnt;
128 return cnt == patternLength;
129 }
130 case PrefixPattern: {
131 if (fileNameLength + 1 < patternLength)
132 return false;
133
134 const QChar *c1 = m_pattern.unicode();
135 const QChar *c2 = fileName.unicode();
136 int cnt = 1;
137 while (cnt < patternLength && *c1++ == *c2++)
138 ++cnt;
139 return cnt == patternLength;
140 }
141 case LiteralPattern:
142 return (m_pattern == fileName);
143 case VdrPattern: // "[0-9][0-9][0-9].vdr" case
144 return fileNameLength == 7
145 && fileName.at(0).isDigit() && fileName.at(1).isDigit() && fileName.at(2).isDigit()
146 && QStringView{fileName}.mid(3, 4) == ".vdr"_L1;
147 case AnimPattern: { // "*.anim[1-9j]" case
148 if (fileNameLength < 6)
149 return false;
150 const QChar lastChar = fileName.at(fileNameLength - 1);
151 const bool lastCharOK = (lastChar.isDigit() && lastChar != u'0')
152 || lastChar == u'j';
153 return lastCharOK && QStringView{fileName}.mid(fileNameLength - 6, 5) == ".anim"_L1;
154 }
155 case OtherPattern:
156 // Other fallback patterns: slow but correct method
157#if QT_CONFIG(regularexpression)
158 auto rx = QRegularExpression::fromWildcard(m_pattern);
159 return rx.match(fileName).hasMatch();
160#else
161 return false;
162#endif
163 }
164 return false;
165}
166
167static bool isSimplePattern(QStringView pattern)
168{
169 // starts with "*.", has no other '*'
170 return pattern.lastIndexOf(u'*') == 0
171 && pattern.size() > 1
172 && pattern.at(1) == u'.' // (other dots are OK, like *.tar.bz2)
173 // and contains no other special character
174 && !pattern.contains(u'?')
175 && !pattern.contains(u'[')
176 ;
177}
178
179static bool isFastPattern(QStringView pattern)
180{
181 // starts with "*.", has no other '*' and no other '.'
182 return pattern.lastIndexOf(u'*') == 0
183 && pattern.lastIndexOf(u'.') == 1
184 // and contains no other special character
185 && !pattern.contains(u'?')
186 && !pattern.contains(u'[')
187 ;
188}
189
191{
192 const QString &pattern = glob.pattern();
193 Q_ASSERT(!pattern.isEmpty());
194
195 // Store each patterns into either m_fastPatternDict (*.txt, *.html
196 // etc. with default weight 50) or for the rest, like core.*, *.tar.bz2, *~,
197 // into highWeightPatternOffset (>50) or lowWeightPatternOffset (<=50).
198
199 if (glob.weight() == 50 && isFastPattern(pattern) && !glob.isCaseSensitive()) {
200 // The bulk of the patterns is *.foo with weight 50 --> those go into the fast patterns hash.
201 const QString extension = pattern.mid(2).toLower();
202 QStringList &patterns = m_fastPatterns[extension]; // find or create
203 if (!patterns.contains(glob.mimeType()))
204 patterns.append(glob.mimeType());
205 } else {
206 if (glob.weight() > 50) {
207 if (!m_highWeightGlobs.hasPattern(glob.mimeType(), glob.pattern()))
208 m_highWeightGlobs.append(glob);
209 } else {
210 if (!m_lowWeightGlobs.hasPattern(glob.mimeType(), glob.pattern()))
211 m_lowWeightGlobs.append(glob);
212 }
213 }
214}
215
216void QMimeAllGlobPatterns::removeMimeType(const QString &mimeType)
217{
218 for (auto &x : m_fastPatterns)
219 x.removeAll(mimeType);
220 m_highWeightGlobs.removeMimeType(mimeType);
221 m_lowWeightGlobs.removeMimeType(mimeType);
222}
223
224void QMimeGlobPatternList::match(QMimeGlobMatchResult &result, const QString &fileName,
225 const AddMatchFilterFunc &filterFunc) const
226{
227 for (const QMimeGlobPattern &glob : *this) {
228 if (glob.matchFileName(fileName) && filterFunc(glob.mimeType())) {
229 const QString pattern = glob.pattern();
230 const qsizetype suffixLen = isSimplePattern(pattern) ? pattern.size() - strlen("*.") : 0;
231 result.addMatch(glob.mimeType(), glob.weight(), pattern, suffixLen);
232 }
233 }
234}
235
236void QMimeAllGlobPatterns::matchingGlobs(const QString &fileName, QMimeGlobMatchResult &result,
237 const AddMatchFilterFunc &filterFunc) const
238{
239 // First try the high weight matches (>50), if any.
240 m_highWeightGlobs.match(result, fileName, filterFunc);
241
242 // Now use the "fast patterns" dict, for simple *.foo patterns with weight 50
243 // (which is most of them, so this optimization is definitely worth it)
244 const qsizetype lastDot = fileName.lastIndexOf(u'.');
245 if (lastDot != -1) { // if no '.', skip the extension lookup
246 const qsizetype ext_len = fileName.size() - lastDot - 1;
247 const QString simpleExtension = fileName.right(ext_len).toLower();
248 // (toLower because fast patterns are always case-insensitive and saved as lowercase)
249
250 const QStringList matchingMimeTypes = m_fastPatterns.value(simpleExtension);
251 const QString simplePattern = "*."_L1 + simpleExtension;
252 for (const QString &mime : matchingMimeTypes) {
253 if (filterFunc(mime))
254 result.addMatch(mime, 50, simplePattern, simpleExtension.size());
255 }
256 // Can't return yet; *.tar.bz2 has to win over *.bz2, so we need the
257 // low-weight mimetypes anyway, at least those with weight 50.
258 }
259
260 // Finally, try the low weight matches (<=50)
261 m_lowWeightGlobs.match(result, fileName, filterFunc);
262}
263
265{
266 m_fastPatterns.clear();
267 m_highWeightGlobs.clear();
268 m_lowWeightGlobs.clear();
269}
270
271QT_END_NAMESPACE
Result of the globs parsing, as data structures ready for efficient MIME type matching.
void addGlob(const QMimeGlobPattern &glob)
void matchingGlobs(const QString &fileName, QMimeGlobMatchResult &result, const AddMatchFilterFunc &filterFunc) const
void removeMimeType(const QString &mimeType)
QMimeGlobPatternList m_highWeightGlobs
QMimeGlobPatternList m_lowWeightGlobs
void match(QMimeGlobMatchResult &result, const QString &fileName, const AddMatchFilterFunc &filterFunc) const
The QMimeGlobPattern class contains the glob pattern for file names for MIME type matching.
unsigned weight() const
bool matchFileName(const QString &inputFileName) const
static bool isSimplePattern(QStringView pattern)
static bool isFastPattern(QStringView pattern)